linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] PM: hibernate: don't store zero pages in the image file.
@ 2023-01-13 19:30 Brian Geffon
  2023-01-14  0:15 ` [PATCH v2] " Brian Geffon
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Brian Geffon @ 2023-01-13 19:30 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel, Brian Geffon

On ChromeOS we've observed a considerable number of in-use pages filled
with zeros. Today with hibernate it's entirely possible that saveable
pages are just zero filled. Since we're already copying pages
word-by-word in do_copy_page it becomes almost free to determine if a
page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages.
If a page is zero it will not be included in the saved image, instead to
track these zero pages in the image file we will introduce a new flag
which we will set on the packed PFN list. When reading back in the image
file we will detect these zero page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to snapshot_write_next if we
encounter a zero page we will silently memset it to 0 and then continue on
to the next page. Given the implementation in snapshot_read_next and
snapshot_write_next this change  will be transparent to non-compressed,
compressed, and swsusp modes of operation.

To provide some concrete numbers from simple ad-hoc testing, on a device
which was lightly in use we saw that:

 PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero
filled and could be tracked entirely within the packed PFN list. The
savings would obviously be much lower for lzo compressed images, but even
in the case of compression not copying pages across to the compression
threads will still speed things up. It's also possible that we would see
better overall compression ratios as larger regions of "real data" would
improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 kernel/power/snapshot.c | 127 ++++++++++++++++++++++++++++++----------
 1 file changed, 97 insertions(+), 30 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..9846de7f2d41 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -850,6 +852,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +936,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1371,14 +1380,18 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline int do_copy_page(long *dst, long *src)
 {
 	int n;
+	long z = 0;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1389,15 +1402,17 @@ static inline void do_copy_page(long *dst, long *src)
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
  * always returns 'true'.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static int safe_copy_page(void *dst, struct page *s_page)
 {
+	int ret;
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return ret;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1407,17 +1422,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	int ret;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		ret = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1426,30 +1442,32 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			ret = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			ret = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return ret;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
 static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm)
 {
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1462,11 +1480,18 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+
+			/* We will reuse this copy_pfn for a real 'nonzero' page. */
+			continue;
+		}
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
 }
 
@@ -1494,6 +1519,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1756,6 +1784,12 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
 
@@ -2013,11 +2047,12 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
 
 asmlinkage __visible int swsusp_save(void)
 {
-	unsigned int nr_pages, nr_highmem;
+	unsigned int nr_pages, nr_highmem, nr_zero_pages;
 
 	pr_info("Creating image:\n");
 
 	drain_local_pages(NULL);
+	nr_zero_pages = 0;
 	nr_pages = count_data_pages();
 	nr_highmem = count_highmem_pages();
 	pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
@@ -2037,19 +2072,23 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
+	memory_bm_position_reset(&zero_bm);
+	while (memory_bm_next_pfn(&zero_bm) != BM_END_OF_MAP)
+		nr_zero_pages++;
 
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_copy_pages = nr_pages - nr_zero_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2094,15 +2133,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG (1UL << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2110,6 +2156,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2151,7 +2199,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2247,24 +2295,30 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
-	int j;
+	int j, zero;
+	unsigned long decoded_pfn;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2631,6 +2685,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
 	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
 		return 0;
@@ -2657,9 +2712,13 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
@@ -2686,6 +2745,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if ((handle->cur > nr_meta_pages + 1) &&
+			memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
-- 
2.39.0.314.g84b9a713c41-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2] PM: hibernate: don't store zero pages in the image file.
  2023-01-13 19:30 [PATCH] PM: hibernate: don't store zero pages in the image file Brian Geffon
@ 2023-01-14  0:15 ` Brian Geffon
  2023-02-09 19:44   ` Rafael J. Wysocki
  2023-03-02 17:13 ` [PATCH v3] " Brian Geffon
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Brian Geffon @ 2023-01-14  0:15 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel, Brian Geffon

On ChromeOS we've observed a considerable number of in-use pages filled
with zeros. Today with hibernate it's entirely possible that saveable
pages are just zero filled. Since we're already copying pages
word-by-word in do_copy_page it becomes almost free to determine if a
page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages.
If a page is zero it will not be included in the saved image, instead to
track these zero pages in the image file we will introduce a new flag
which we will set on the packed PFN list. When reading back in the image
file we will detect these zero page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to snapshot_write_next if we
encounter a zero page we will silently memset it to 0 and then continue on
to the next page. Given the implementation in snapshot_read_next and
snapshot_write_next this change  will be transparent to non-compressed,
compressed, and swsusp modes of operation.

To provide some concrete numbers from simple ad-hoc testing, on a device
which was lightly in use we saw that:

 PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero
filled and could be tracked entirely within the packed PFN list. The
savings would obviously be much lower for lzo compressed images, but even
in the case of compression not copying pages across to the compression
threads will still speed things up. It's also possible that we would see
better overall compression ratios as larger regions of "real data" would
improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

patch v2:
	- correct a minor issue when rebasing.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 kernel/power/snapshot.c | 129 ++++++++++++++++++++++++++++++----------
 1 file changed, 99 insertions(+), 30 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..8d0ba36b0218 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -850,6 +852,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +936,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1371,14 +1380,18 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline int do_copy_page(long *dst, long *src)
 {
 	int n;
+	long z = 0;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1389,15 +1402,17 @@ static inline void do_copy_page(long *dst, long *src)
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
  * always returns 'true'.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static int safe_copy_page(void *dst, struct page *s_page)
 {
+	int ret;
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return ret;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1407,17 +1422,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	int ret;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		ret = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1426,30 +1442,32 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			ret = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			ret = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return ret;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
 static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm)
 {
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1462,11 +1480,18 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+
+			/* We will reuse this copy_pfn for a real 'nonzero' page. */
+			continue;
+		}
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
 }
 
@@ -1494,6 +1519,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1756,6 +1784,12 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
 
@@ -2013,11 +2047,12 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
 
 asmlinkage __visible int swsusp_save(void)
 {
-	unsigned int nr_pages, nr_highmem;
+	unsigned int nr_pages, nr_highmem, nr_zero_pages;
 
 	pr_info("Creating image:\n");
 
 	drain_local_pages(NULL);
+	nr_zero_pages = 0;
 	nr_pages = count_data_pages();
 	nr_highmem = count_highmem_pages();
 	pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
@@ -2037,19 +2072,23 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
+	memory_bm_position_reset(&zero_bm);
+	while (memory_bm_next_pfn(&zero_bm) != BM_END_OF_MAP)
+		nr_zero_pages++;
 
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_copy_pages = nr_pages - nr_zero_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2094,15 +2133,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG (1UL << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2110,6 +2156,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2151,7 +2199,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2247,24 +2295,32 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
-	int j;
+	int j, zero;
+	unsigned long decoded_pfn;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
+			if (zero)
+				memory_bm_set_bit(zero_bm, decoded_pfn);
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2631,6 +2687,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
 	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
 		return 0;
@@ -2657,9 +2714,13 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
@@ -2686,6 +2747,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if ((handle->cur > nr_meta_pages + 1) &&
+			memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
-- 
2.39.0.314.g84b9a713c41-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] PM: hibernate: don't store zero pages in the image file.
  2023-01-14  0:15 ` [PATCH v2] " Brian Geffon
@ 2023-02-09 19:44   ` Rafael J. Wysocki
  2023-02-13 14:45     ` Brian Geffon
  2023-03-02 17:25     ` Brian Geffon
  0 siblings, 2 replies; 16+ messages in thread
From: Rafael J. Wysocki @ 2023-02-09 19:44 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Rafael J. Wysocki, Pavel Machek, Len Brown, linux-pm,
	linux-kernel

On Sat, Jan 14, 2023 at 1:15 AM Brian Geffon <bgeffon@google.com> wrote:
>
> On ChromeOS we've observed a considerable number of in-use pages filled
> with zeros. Today with hibernate it's entirely possible that saveable
> pages are just zero filled. Since we're already copying pages
> word-by-word in do_copy_page it becomes almost free to determine if a
> page was completely filled with zeros.
>
> This change introduces a new bitmap which will track these zero pages.
> If a page is zero it will not be included in the saved image, instead to
> track these zero pages in the image file we will introduce a new flag
> which we will set on the packed PFN list. When reading back in the image
> file we will detect these zero page PFNs and rebuild the zero page bitmap.
>
> When the image is being loaded through calls to snapshot_write_next if we
> encounter a zero page we will silently memset it to 0 and then continue on
> to the next page. Given the implementation in snapshot_read_next and
> snapshot_write_next this change  will be transparent to non-compressed,
> compressed, and swsusp modes of operation.
>
> To provide some concrete numbers from simple ad-hoc testing, on a device
> which was lightly in use we saw that:
>
>  PM: hibernation: Image created (964408 pages copied, 548304 zero pages)
>
> Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero
> filled and could be tracked entirely within the packed PFN list. The
> savings would obviously be much lower for lzo compressed images, but even
> in the case of compression not copying pages across to the compression
> threads will still speed things up. It's also possible that we would see
> better overall compression ratios as larger regions of "real data" would
> improve the compressibility.
>
> Finally, such an approach could dramatically improve swsusp performance
> as each one of those zero pages requires a write syscall to reload, by
> handling it as part of the packed PFN list we're able to fully avoid
> that.
>
> patch v2:
>         - correct a minor issue when rebasing.

I need some more time to go through this in more detail, so it is
likely to miss 6.3.  Sorry about that.

> Signed-off-by: Brian Geffon <bgeffon@google.com>
> ---
>  kernel/power/snapshot.c | 129 ++++++++++++++++++++++++++++++----------
>  1 file changed, 99 insertions(+), 30 deletions(-)
>
> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> index cd8b7b35f1e8..8d0ba36b0218 100644
> --- a/kernel/power/snapshot.c
> +++ b/kernel/power/snapshot.c
> @@ -404,6 +404,7 @@ struct bm_position {
>         struct mem_zone_bm_rtree *zone;
>         struct rtree_node *node;
>         unsigned long node_pfn;
> +       unsigned long cur_pfn;
>         int node_bit;
>  };
>
> @@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
>         bm->cur.node = list_entry(bm->cur.zone->leaves.next,
>                                   struct rtree_node, list);
>         bm->cur.node_pfn = 0;
> +       bm->cur.cur_pfn = BM_END_OF_MAP;
>         bm->cur.node_bit = 0;
>  }
>
> @@ -850,6 +852,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
>         clear_bit(bit, bm->cur.node->data);
>  }
>
> +static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
> +{
> +       return bm->cur.cur_pfn;
> +}
> +
>  static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
>  {
>         void *addr;
> @@ -929,10 +936,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
>                 if (bit < bits) {
>                         pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
>                         bm->cur.node_bit = bit + 1;
> +                       bm->cur.cur_pfn = pfn;
>                         return pfn;
>                 }
>         } while (rtree_next_node(bm));
>
> +       bm->cur.cur_pfn = BM_END_OF_MAP;
>         return BM_END_OF_MAP;
>  }
>
> @@ -1371,14 +1380,18 @@ static unsigned int count_data_pages(void)
>
>  /*
>   * This is needed, because copy_page and memcpy are not usable for copying
> - * task structs.
> + * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.
>   */
> -static inline void do_copy_page(long *dst, long *src)
> +static inline int do_copy_page(long *dst, long *src)
>  {
>         int n;
> +       long z = 0;
>
> -       for (n = PAGE_SIZE / sizeof(long); n; n--)
> +       for (n = PAGE_SIZE / sizeof(long); n; n--) {
> +               z |= *src;
>                 *dst++ = *src++;
> +       }
> +       return !z;
>  }
>
>  /**
> @@ -1389,15 +1402,17 @@ static inline void do_copy_page(long *dst, long *src)
>   * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
>   * always returns 'true'.
>   */
> -static void safe_copy_page(void *dst, struct page *s_page)
> +static int safe_copy_page(void *dst, struct page *s_page)
>  {
> +       int ret;
>         if (kernel_page_present(s_page)) {
> -               do_copy_page(dst, page_address(s_page));
> +               ret = do_copy_page(dst, page_address(s_page));
>         } else {
>                 hibernate_map_page(s_page);
> -               do_copy_page(dst, page_address(s_page));
> +               ret = do_copy_page(dst, page_address(s_page));
>                 hibernate_unmap_page(s_page);
>         }
> +       return ret;
>  }
>
>  #ifdef CONFIG_HIGHMEM
> @@ -1407,17 +1422,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
>                 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
>  }
>
> -static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> +static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>  {
>         struct page *s_page, *d_page;
>         void *src, *dst;
> +       int ret;
>
>         s_page = pfn_to_page(src_pfn);
>         d_page = pfn_to_page(dst_pfn);
>         if (PageHighMem(s_page)) {
>                 src = kmap_atomic(s_page);
>                 dst = kmap_atomic(d_page);
> -               do_copy_page(dst, src);
> +               ret = do_copy_page(dst, src);
>                 kunmap_atomic(dst);
>                 kunmap_atomic(src);
>         } else {
> @@ -1426,30 +1442,32 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>                          * The page pointed to by src may contain some kernel
>                          * data modified by kmap_atomic()
>                          */
> -                       safe_copy_page(buffer, s_page);
> +                       ret = safe_copy_page(buffer, s_page);
>                         dst = kmap_atomic(d_page);
>                         copy_page(dst, buffer);
>                         kunmap_atomic(dst);
>                 } else {
> -                       safe_copy_page(page_address(d_page), s_page);
> +                       ret = safe_copy_page(page_address(d_page), s_page);
>                 }
>         }
> +       return ret;
>  }
>  #else
>  #define page_is_saveable(zone, pfn)    saveable_page(zone, pfn)
>
> -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>  {
> -       safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> +       return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
>                                 pfn_to_page(src_pfn));
>  }
>  #endif /* CONFIG_HIGHMEM */
>
>  static void copy_data_pages(struct memory_bitmap *copy_bm,
> -                           struct memory_bitmap *orig_bm)
> +                           struct memory_bitmap *orig_bm,
> +                           struct memory_bitmap *zero_bm)
>  {
>         struct zone *zone;
> -       unsigned long pfn;
> +       unsigned long pfn, copy_pfn;
>
>         for_each_populated_zone(zone) {
>                 unsigned long max_zone_pfn;
> @@ -1462,11 +1480,18 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
>         }
>         memory_bm_position_reset(orig_bm);
>         memory_bm_position_reset(copy_bm);
> +       copy_pfn = memory_bm_next_pfn(copy_bm);
>         for(;;) {
>                 pfn = memory_bm_next_pfn(orig_bm);
>                 if (unlikely(pfn == BM_END_OF_MAP))
>                         break;
> -               copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
> +               if (copy_data_page(copy_pfn, pfn)) {
> +                       memory_bm_set_bit(zero_bm, pfn);
> +
> +                       /* We will reuse this copy_pfn for a real 'nonzero' page. */
> +                       continue;
> +               }
> +               copy_pfn = memory_bm_next_pfn(copy_bm);
>         }
>  }
>
> @@ -1494,6 +1519,9 @@ static struct memory_bitmap orig_bm;
>   */
>  static struct memory_bitmap copy_bm;
>
> +/* Memory bitmap which tracks which saveable pages were zero filled. */
> +static struct memory_bitmap zero_bm;
> +
>  /**
>   * swsusp_free - Free pages allocated for hibernation image.
>   *
> @@ -1756,6 +1784,12 @@ int hibernate_preallocate_memory(void)
>                 goto err_out;
>         }
>
> +       error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
> +       if (error) {
> +               pr_err("Cannot allocate zero bitmap\n");
> +               goto err_out;
> +       }
> +
>         alloc_normal = 0;
>         alloc_highmem = 0;
>
> @@ -2013,11 +2047,12 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
>
>  asmlinkage __visible int swsusp_save(void)
>  {
> -       unsigned int nr_pages, nr_highmem;
> +       unsigned int nr_pages, nr_highmem, nr_zero_pages;
>
>         pr_info("Creating image:\n");
>
>         drain_local_pages(NULL);
> +       nr_zero_pages = 0;
>         nr_pages = count_data_pages();
>         nr_highmem = count_highmem_pages();
>         pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
> @@ -2037,19 +2072,23 @@ asmlinkage __visible int swsusp_save(void)
>          * Kill them.
>          */
>         drain_local_pages(NULL);
> -       copy_data_pages(&copy_bm, &orig_bm);
> +       copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
>
>         /*
>          * End of critical section. From now on, we can write to memory,
>          * but we should not touch disk. This specially means we must _not_
>          * touch swap space! Except we must write out our image of course.
>          */
> +       memory_bm_position_reset(&zero_bm);
> +       while (memory_bm_next_pfn(&zero_bm) != BM_END_OF_MAP)
> +               nr_zero_pages++;
>
>         nr_pages += nr_highmem;
> -       nr_copy_pages = nr_pages;
> +       /* We don't actually copy the zero pages */
> +       nr_copy_pages = nr_pages - nr_zero_pages;
>         nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
>
> -       pr_info("Image created (%d pages copied)\n", nr_pages);
> +       pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
>
>         return 0;
>  }
> @@ -2094,15 +2133,22 @@ static int init_header(struct swsusp_info *info)
>         return init_header_complete(info);
>  }
>
> +#define ENCODED_PFN_ZERO_FLAG (1UL << (BITS_PER_LONG - 1))
> +#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
> +
>  /**
>   * pack_pfns - Prepare PFNs for saving.
>   * @bm: Memory bitmap.
>   * @buf: Memory buffer to store the PFNs in.
> + * @zero_bm: Memory bitmap containing PFNs of zero pages.
>   *
>   * PFNs corresponding to set bits in @bm are stored in the area of memory
> - * pointed to by @buf (1 page at a time).
> + * pointed to by @buf (1 page at a time). Pages which were filled with only
> + * zeros will have the highest bit set in the packed format to distinguish
> + * them from PFNs which will be contained in the image file.
>   */
> -static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
> +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
>         int j;
>
> @@ -2110,6 +2156,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
>                 buf[j] = memory_bm_next_pfn(bm);
>                 if (unlikely(buf[j] == BM_END_OF_MAP))
>                         break;
> +               if (memory_bm_test_bit(zero_bm, buf[j]))
> +                       buf[j] |= ENCODED_PFN_ZERO_FLAG;
>         }
>  }
>
> @@ -2151,7 +2199,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
>                 memory_bm_position_reset(&copy_bm);
>         } else if (handle->cur <= nr_meta_pages) {
>                 clear_page(buffer);
> -               pack_pfns(buffer, &orig_bm);
> +               pack_pfns(buffer, &orig_bm, &zero_bm);
>         } else {
>                 struct page *page;
>
> @@ -2247,24 +2295,32 @@ static int load_header(struct swsusp_info *info)
>   * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
>   * @bm: Memory bitmap.
>   * @buf: Area of memory containing the PFNs.
> + * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
>   *
>   * For each element of the array pointed to by @buf (1 page at a time), set the
> - * corresponding bit in @bm.
> + * corresponding bit in @bm. If the page was originally populated with only
> + * zeros then a corresponding bit will also be set in @zero_bm.
>   */
> -static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
> +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
> -       int j;
> +       int j, zero;
> +       unsigned long decoded_pfn;
>
>         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
>                 if (unlikely(buf[j] == BM_END_OF_MAP))
>                         break;
>
> -               if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
> -                       memory_bm_set_bit(bm, buf[j]);
> +               zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
> +               decoded_pfn = buf[j] & ENCODED_PFN_MASK;
> +               if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
> +                       memory_bm_set_bit(bm, decoded_pfn);
> +                       if (zero)
> +                               memory_bm_set_bit(zero_bm, decoded_pfn);
>                 } else {
> -                       if (!pfn_valid(buf[j]))
> +                       if (!pfn_valid(decoded_pfn))
>                                 pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
> -                                      (unsigned long long)PFN_PHYS(buf[j]));
> +                                      (unsigned long long)PFN_PHYS(decoded_pfn));
>                         return -EFAULT;
>                 }
>         }
> @@ -2631,6 +2687,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
>         static struct chain_allocator ca;
>         int error = 0;
>
> +next:
>         /* Check if we have already loaded the entire image */
>         if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
>                 return 0;
> @@ -2657,9 +2714,13 @@ int snapshot_write_next(struct snapshot_handle *handle)
>                 if (error)
>                         return error;
>
> +               error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
> +               if (error)
> +                       return error;
> +
>                 hibernate_restore_protection_begin();
>         } else if (handle->cur <= nr_meta_pages + 1) {
> -               error = unpack_orig_pfns(buffer, &copy_bm);
> +               error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
>                 if (error)
>                         return error;
>
> @@ -2686,6 +2747,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
>                         handle->sync_read = 0;
>         }
>         handle->cur++;
> +
> +       /* Zero pages were not included in the image, memset it and move on. */
> +       if ((handle->cur > nr_meta_pages + 1) &&
> +                       memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
> +               memset(handle->buffer, 0, PAGE_SIZE);
> +               goto next;
> +       }
> +
>         return PAGE_SIZE;
>  }
>
> --
> 2.39.0.314.g84b9a713c41-goog
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] PM: hibernate: don't store zero pages in the image file.
  2023-02-09 19:44   ` Rafael J. Wysocki
@ 2023-02-13 14:45     ` Brian Geffon
  2023-02-18 11:48       ` Pavel Machek
  2023-03-02 17:25     ` Brian Geffon
  1 sibling, 1 reply; 16+ messages in thread
From: Brian Geffon @ 2023-02-13 14:45 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel

On Thu, Feb 9, 2023 at 2:44 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Sat, Jan 14, 2023 at 1:15 AM Brian Geffon <bgeffon@google.com> wrote:
> >
> > On ChromeOS we've observed a considerable number of in-use pages filled
> > with zeros. Today with hibernate it's entirely possible that saveable
> > pages are just zero filled. Since we're already copying pages
> > word-by-word in do_copy_page it becomes almost free to determine if a
> > page was completely filled with zeros.
> >
> > This change introduces a new bitmap which will track these zero pages.
> > If a page is zero it will not be included in the saved image, instead to
> > track these zero pages in the image file we will introduce a new flag
> > which we will set on the packed PFN list. When reading back in the image
> > file we will detect these zero page PFNs and rebuild the zero page bitmap.
> >
> > When the image is being loaded through calls to snapshot_write_next if we
> > encounter a zero page we will silently memset it to 0 and then continue on
> > to the next page. Given the implementation in snapshot_read_next and
> > snapshot_write_next this change  will be transparent to non-compressed,
> > compressed, and swsusp modes of operation.
> >
> > To provide some concrete numbers from simple ad-hoc testing, on a device
> > which was lightly in use we saw that:
> >
> >  PM: hibernation: Image created (964408 pages copied, 548304 zero pages)
> >
> > Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero
> > filled and could be tracked entirely within the packed PFN list. The
> > savings would obviously be much lower for lzo compressed images, but even
> > in the case of compression not copying pages across to the compression
> > threads will still speed things up. It's also possible that we would see
> > better overall compression ratios as larger regions of "real data" would
> > improve the compressibility.
> >
> > Finally, such an approach could dramatically improve swsusp performance
> > as each one of those zero pages requires a write syscall to reload, by
> > handling it as part of the packed PFN list we're able to fully avoid
> > that.
> >
> > patch v2:
> >         - correct a minor issue when rebasing.
>
> I need some more time to go through this in more detail, so it is
> likely to miss 6.3.  Sorry about that.

No problem, consider this more of an RFC, it seems like a
low-risk/low-cost way to reduce image sizes so I was just hoping to
get feedback. Take your time reviewing.

>
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > ---
> >  kernel/power/snapshot.c | 129 ++++++++++++++++++++++++++++++----------
> >  1 file changed, 99 insertions(+), 30 deletions(-)
> >
> > diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> > index cd8b7b35f1e8..8d0ba36b0218 100644
> > --- a/kernel/power/snapshot.c
> > +++ b/kernel/power/snapshot.c
> > @@ -404,6 +404,7 @@ struct bm_position {
> >         struct mem_zone_bm_rtree *zone;
> >         struct rtree_node *node;
> >         unsigned long node_pfn;
> > +       unsigned long cur_pfn;
> >         int node_bit;
> >  };
> >
> > @@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
> >         bm->cur.node = list_entry(bm->cur.zone->leaves.next,
> >                                   struct rtree_node, list);
> >         bm->cur.node_pfn = 0;
> > +       bm->cur.cur_pfn = BM_END_OF_MAP;
> >         bm->cur.node_bit = 0;
> >  }
> >
> > @@ -850,6 +852,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
> >         clear_bit(bit, bm->cur.node->data);
> >  }
> >
> > +static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
> > +{
> > +       return bm->cur.cur_pfn;
> > +}
> > +
> >  static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
> >  {
> >         void *addr;
> > @@ -929,10 +936,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
> >                 if (bit < bits) {
> >                         pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
> >                         bm->cur.node_bit = bit + 1;
> > +                       bm->cur.cur_pfn = pfn;
> >                         return pfn;
> >                 }
> >         } while (rtree_next_node(bm));
> >
> > +       bm->cur.cur_pfn = BM_END_OF_MAP;
> >         return BM_END_OF_MAP;
> >  }
> >
> > @@ -1371,14 +1380,18 @@ static unsigned int count_data_pages(void)
> >
> >  /*
> >   * This is needed, because copy_page and memcpy are not usable for copying
> > - * task structs.
> > + * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.
> >   */
> > -static inline void do_copy_page(long *dst, long *src)
> > +static inline int do_copy_page(long *dst, long *src)
> >  {
> >         int n;
> > +       long z = 0;
> >
> > -       for (n = PAGE_SIZE / sizeof(long); n; n--)
> > +       for (n = PAGE_SIZE / sizeof(long); n; n--) {
> > +               z |= *src;
> >                 *dst++ = *src++;
> > +       }
> > +       return !z;
> >  }
> >
> >  /**
> > @@ -1389,15 +1402,17 @@ static inline void do_copy_page(long *dst, long *src)
> >   * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
> >   * always returns 'true'.
> >   */
> > -static void safe_copy_page(void *dst, struct page *s_page)
> > +static int safe_copy_page(void *dst, struct page *s_page)
> >  {
> > +       int ret;
> >         if (kernel_page_present(s_page)) {
> > -               do_copy_page(dst, page_address(s_page));
> > +               ret = do_copy_page(dst, page_address(s_page));
> >         } else {
> >                 hibernate_map_page(s_page);
> > -               do_copy_page(dst, page_address(s_page));
> > +               ret = do_copy_page(dst, page_address(s_page));
> >                 hibernate_unmap_page(s_page);
> >         }
> > +       return ret;
> >  }
> >
> >  #ifdef CONFIG_HIGHMEM
> > @@ -1407,17 +1422,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
> >                 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
> >  }
> >
> > -static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> > +static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> >  {
> >         struct page *s_page, *d_page;
> >         void *src, *dst;
> > +       int ret;
> >
> >         s_page = pfn_to_page(src_pfn);
> >         d_page = pfn_to_page(dst_pfn);
> >         if (PageHighMem(s_page)) {
> >                 src = kmap_atomic(s_page);
> >                 dst = kmap_atomic(d_page);
> > -               do_copy_page(dst, src);
> > +               ret = do_copy_page(dst, src);
> >                 kunmap_atomic(dst);
> >                 kunmap_atomic(src);
> >         } else {
> > @@ -1426,30 +1442,32 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> >                          * The page pointed to by src may contain some kernel
> >                          * data modified by kmap_atomic()
> >                          */
> > -                       safe_copy_page(buffer, s_page);
> > +                       ret = safe_copy_page(buffer, s_page);
> >                         dst = kmap_atomic(d_page);
> >                         copy_page(dst, buffer);
> >                         kunmap_atomic(dst);
> >                 } else {
> > -                       safe_copy_page(page_address(d_page), s_page);
> > +                       ret = safe_copy_page(page_address(d_page), s_page);
> >                 }
> >         }
> > +       return ret;
> >  }
> >  #else
> >  #define page_is_saveable(zone, pfn)    saveable_page(zone, pfn)
> >
> > -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> > +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> >  {
> > -       safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> > +       return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> >                                 pfn_to_page(src_pfn));
> >  }
> >  #endif /* CONFIG_HIGHMEM */
> >
> >  static void copy_data_pages(struct memory_bitmap *copy_bm,
> > -                           struct memory_bitmap *orig_bm)
> > +                           struct memory_bitmap *orig_bm,
> > +                           struct memory_bitmap *zero_bm)
> >  {
> >         struct zone *zone;
> > -       unsigned long pfn;
> > +       unsigned long pfn, copy_pfn;
> >
> >         for_each_populated_zone(zone) {
> >                 unsigned long max_zone_pfn;
> > @@ -1462,11 +1480,18 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
> >         }
> >         memory_bm_position_reset(orig_bm);
> >         memory_bm_position_reset(copy_bm);
> > +       copy_pfn = memory_bm_next_pfn(copy_bm);
> >         for(;;) {
> >                 pfn = memory_bm_next_pfn(orig_bm);
> >                 if (unlikely(pfn == BM_END_OF_MAP))
> >                         break;
> > -               copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
> > +               if (copy_data_page(copy_pfn, pfn)) {
> > +                       memory_bm_set_bit(zero_bm, pfn);
> > +
> > +                       /* We will reuse this copy_pfn for a real 'nonzero' page. */
> > +                       continue;
> > +               }
> > +               copy_pfn = memory_bm_next_pfn(copy_bm);
> >         }
> >  }
> >
> > @@ -1494,6 +1519,9 @@ static struct memory_bitmap orig_bm;
> >   */
> >  static struct memory_bitmap copy_bm;
> >
> > +/* Memory bitmap which tracks which saveable pages were zero filled. */
> > +static struct memory_bitmap zero_bm;
> > +
> >  /**
> >   * swsusp_free - Free pages allocated for hibernation image.
> >   *
> > @@ -1756,6 +1784,12 @@ int hibernate_preallocate_memory(void)
> >                 goto err_out;
> >         }
> >
> > +       error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
> > +       if (error) {
> > +               pr_err("Cannot allocate zero bitmap\n");
> > +               goto err_out;
> > +       }
> > +
> >         alloc_normal = 0;
> >         alloc_highmem = 0;
> >
> > @@ -2013,11 +2047,12 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
> >
> >  asmlinkage __visible int swsusp_save(void)
> >  {
> > -       unsigned int nr_pages, nr_highmem;
> > +       unsigned int nr_pages, nr_highmem, nr_zero_pages;
> >
> >         pr_info("Creating image:\n");
> >
> >         drain_local_pages(NULL);
> > +       nr_zero_pages = 0;
> >         nr_pages = count_data_pages();
> >         nr_highmem = count_highmem_pages();
> >         pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
> > @@ -2037,19 +2072,23 @@ asmlinkage __visible int swsusp_save(void)
> >          * Kill them.
> >          */
> >         drain_local_pages(NULL);
> > -       copy_data_pages(&copy_bm, &orig_bm);
> > +       copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
> >
> >         /*
> >          * End of critical section. From now on, we can write to memory,
> >          * but we should not touch disk. This specially means we must _not_
> >          * touch swap space! Except we must write out our image of course.
> >          */
> > +       memory_bm_position_reset(&zero_bm);
> > +       while (memory_bm_next_pfn(&zero_bm) != BM_END_OF_MAP)
> > +               nr_zero_pages++;
> >
> >         nr_pages += nr_highmem;
> > -       nr_copy_pages = nr_pages;
> > +       /* We don't actually copy the zero pages */
> > +       nr_copy_pages = nr_pages - nr_zero_pages;
> >         nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
> >
> > -       pr_info("Image created (%d pages copied)\n", nr_pages);
> > +       pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
> >
> >         return 0;
> >  }
> > @@ -2094,15 +2133,22 @@ static int init_header(struct swsusp_info *info)
> >         return init_header_complete(info);
> >  }
> >
> > +#define ENCODED_PFN_ZERO_FLAG (1UL << (BITS_PER_LONG - 1))
> > +#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
> > +
> >  /**
> >   * pack_pfns - Prepare PFNs for saving.
> >   * @bm: Memory bitmap.
> >   * @buf: Memory buffer to store the PFNs in.
> > + * @zero_bm: Memory bitmap containing PFNs of zero pages.
> >   *
> >   * PFNs corresponding to set bits in @bm are stored in the area of memory
> > - * pointed to by @buf (1 page at a time).
> > + * pointed to by @buf (1 page at a time). Pages which were filled with only
> > + * zeros will have the highest bit set in the packed format to distinguish
> > + * them from PFNs which will be contained in the image file.
> >   */
> > -static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
> > +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
> > +               struct memory_bitmap *zero_bm)
> >  {
> >         int j;
> >
> > @@ -2110,6 +2156,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
> >                 buf[j] = memory_bm_next_pfn(bm);
> >                 if (unlikely(buf[j] == BM_END_OF_MAP))
> >                         break;
> > +               if (memory_bm_test_bit(zero_bm, buf[j]))
> > +                       buf[j] |= ENCODED_PFN_ZERO_FLAG;
> >         }
> >  }
> >
> > @@ -2151,7 +2199,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
> >                 memory_bm_position_reset(&copy_bm);
> >         } else if (handle->cur <= nr_meta_pages) {
> >                 clear_page(buffer);
> > -               pack_pfns(buffer, &orig_bm);
> > +               pack_pfns(buffer, &orig_bm, &zero_bm);
> >         } else {
> >                 struct page *page;
> >
> > @@ -2247,24 +2295,32 @@ static int load_header(struct swsusp_info *info)
> >   * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
> >   * @bm: Memory bitmap.
> >   * @buf: Area of memory containing the PFNs.
> > + * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
> >   *
> >   * For each element of the array pointed to by @buf (1 page at a time), set the
> > - * corresponding bit in @bm.
> > + * corresponding bit in @bm. If the page was originally populated with only
> > + * zeros then a corresponding bit will also be set in @zero_bm.
> >   */
> > -static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
> > +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
> > +               struct memory_bitmap *zero_bm)
> >  {
> > -       int j;
> > +       int j, zero;
> > +       unsigned long decoded_pfn;
> >
> >         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
> >                 if (unlikely(buf[j] == BM_END_OF_MAP))
> >                         break;
> >
> > -               if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
> > -                       memory_bm_set_bit(bm, buf[j]);
> > +               zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
> > +               decoded_pfn = buf[j] & ENCODED_PFN_MASK;
> > +               if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
> > +                       memory_bm_set_bit(bm, decoded_pfn);
> > +                       if (zero)
> > +                               memory_bm_set_bit(zero_bm, decoded_pfn);
> >                 } else {
> > -                       if (!pfn_valid(buf[j]))
> > +                       if (!pfn_valid(decoded_pfn))
> >                                 pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
> > -                                      (unsigned long long)PFN_PHYS(buf[j]));
> > +                                      (unsigned long long)PFN_PHYS(decoded_pfn));
> >                         return -EFAULT;
> >                 }
> >         }
> > @@ -2631,6 +2687,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
> >         static struct chain_allocator ca;
> >         int error = 0;
> >
> > +next:
> >         /* Check if we have already loaded the entire image */
> >         if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
> >                 return 0;
> > @@ -2657,9 +2714,13 @@ int snapshot_write_next(struct snapshot_handle *handle)
> >                 if (error)
> >                         return error;
> >
> > +               error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
> > +               if (error)
> > +                       return error;
> > +
> >                 hibernate_restore_protection_begin();
> >         } else if (handle->cur <= nr_meta_pages + 1) {
> > -               error = unpack_orig_pfns(buffer, &copy_bm);
> > +               error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
> >                 if (error)
> >                         return error;
> >
> > @@ -2686,6 +2747,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
> >                         handle->sync_read = 0;
> >         }
> >         handle->cur++;
> > +
> > +       /* Zero pages were not included in the image, memset it and move on. */
> > +       if ((handle->cur > nr_meta_pages + 1) &&
> > +                       memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
> > +               memset(handle->buffer, 0, PAGE_SIZE);
> > +               goto next;
> > +       }
> > +
> >         return PAGE_SIZE;
> >  }
> >
> > --
> > 2.39.0.314.g84b9a713c41-goog
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] PM: hibernate: don't store zero pages in the image file.
  2023-02-13 14:45     ` Brian Geffon
@ 2023-02-18 11:48       ` Pavel Machek
  2023-02-19 18:56         ` Brian Geffon
  0 siblings, 1 reply; 16+ messages in thread
From: Pavel Machek @ 2023-02-18 11:48 UTC (permalink / raw)
  To: Brian Geffon; +Cc: Rafael J. Wysocki, Len Brown, linux-pm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 633 bytes --]

Hi!

> > I need some more time to go through this in more detail, so it is
> > likely to miss 6.3.  Sorry about that.
> 
> No problem, consider this more of an RFC, it seems like a
> low-risk/low-cost way to reduce image sizes so I was just hoping to
> get feedback. Take your time reviewing.

Seems like special case of compression, really, it might be better to
leave it to compression algorithm to solve that.

Not sure if lzo is optimal if you want max speed, but things like gzip
should be "almost free", too.

Best regards,
								Pavel
-- 
People of Russia, stop Putin before his war on Ukraine escalates.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] PM: hibernate: don't store zero pages in the image file.
  2023-02-18 11:48       ` Pavel Machek
@ 2023-02-19 18:56         ` Brian Geffon
  0 siblings, 0 replies; 16+ messages in thread
From: Brian Geffon @ 2023-02-19 18:56 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Rafael J. Wysocki, Len Brown, linux-pm, linux-kernel

On Sat, Feb 18, 2023 at 6:49 AM Pavel Machek <pavel@ucw.cz> wrote:
>
> Hi!

Hey Pavel,

>
> > > I need some more time to go through this in more detail, so it is
> > > likely to miss 6.3.  Sorry about that.
> >
> > No problem, consider this more of an RFC, it seems like a
> > low-risk/low-cost way to reduce image sizes so I was just hoping to
> > get feedback. Take your time reviewing.
>
> Seems like special case of compression, really, it might be better to
> leave it to compression algorithm to solve that.

It's true that it seems like an optimization that really does resemble
compression. But there are a few distinctions I'd like to call out;
firstly, this optimization has effectively an infinite compression
ratio as the zero pages never have to be included at all for that
reason this benefits both the compressed and non-compressed use-cases.
Additionally, this is basically free for both the swsusp and uswsusp
paths as it's transparently handled in the
snapshot_read_next/snapshot_write_next paths.

>
> Not sure if lzo is optimal if you want max speed, but things like gzip
> should be "almost free", too.

While certain compression algorithms are cheap they are definitely not
free. This specific optimization ultimately results in a single OR
after testing the zero flag from a word copy we were already
performing, now that's pretty much free :)

Another final thing I'd like to note about this is that when
compressing today, the default is to compress in 32 page chunks
(iirc). By including the zero pages in those 32 pages you're limiting
your ability to better compress the "real" data, if that makes sense.
Why bother copying those zero pages over to the compressor kthreads
anyway? With all that being said, the code itself is certainly not
free in the sense that it does add some complexity to both the read
and write paths, do you guys think that additional complexity is worth
the benefits we might see?

>
> Best regards,
>                                                                 Pavel

Thanks for taking the time to discuss this!


Brian

> --
> People of Russia, stop Putin before his war on Ukraine escalates.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3] PM: hibernate: don't store zero pages in the image file.
  2023-01-13 19:30 [PATCH] PM: hibernate: don't store zero pages in the image file Brian Geffon
  2023-01-14  0:15 ` [PATCH v2] " Brian Geffon
@ 2023-03-02 17:13 ` Brian Geffon
  2023-03-23 16:19   ` Matthias Kaehlcke
  2023-04-06 18:20 ` [PATCH v4] " Brian Geffon
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Brian Geffon @ 2023-03-02 17:13 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel, Brian Geffon

On ChromeOS we've observed a considerable number of in-use pages filled with
zeros. Today with hibernate it's entirely possible that saveable pages are just
zero filled. Since we're already copying pages word-by-word in do_copy_page it
becomes almost free to determine if a page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages. If a page
is zero it will not be included in the saved image, instead to track these zero
pages in the image file we will introduce a new flag which we will set on the
packed PFN list. When reading back in the image file we will detect these zero
page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to write_next_page if we encounter
a zero page we will silently memset it to 0 and then continue on to the next
page. Given the implementation in snapshot_read_next/snapshot_write_next this
change  will be transparent to non-compressed/compressed and swsusp modes of
operation.

To provide some concrete numbers from simple ad-hoc testing, on a device which
was lightly in use we saw that:

  PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
and could be tracked entirely within the packed PFN list. The savings would
obviously be much lower for lzo compressed images, but even in the case of
compression not copying pages across to the compression threads will still
speed things up. It's also possible that we would see better overall compression
ratios as larger regions of "real data" would improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

 Patch v2 -> v3:
   - Use nr_zero_pages rather than walking each pfn to count.
   - Make sure zero_bm is allocated in safe pages on resume.
     When reading in the pfn list and building the zero page bm
     we don't know which pages are unsafe yet so we will need to
     copy this bm to safe pages after the metadata has been read.

 Patch v1 -> v2:
   - minor code mistake from rebasing corrected.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 kernel/power/snapshot.c | 169 +++++++++++++++++++++++++++++++---------
 1 file changed, 133 insertions(+), 36 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8b..a2c4fe17f9067 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 	bm->cur.zone = zone;
 	bm->cur.node = node;
 	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur.cur_pfn = pfn;
 
 	/* Set return values */
 	*addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline int do_copy_page(long *dst, long *src)
 {
 	int n;
+	long z = 0;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1389,15 +1403,17 @@ static inline void do_copy_page(long *dst, long *src)
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
  * always returns 'true'.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static int safe_copy_page(void *dst, struct page *s_page)
 {
+	int ret;
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return ret;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1407,17 +1423,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	int ret;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		ret = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1426,30 +1443,33 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			ret = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			ret = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return ret;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
 static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm,
+			    unsigned int *zero_count)
 {
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1462,11 +1482,20 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+			if (zero_count)
+				(*zero_count)++;
+
+			/* We will reuse this copy_pfn for a real 'nonzero' page. */
+			continue;
+		}
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
 }
 
@@ -1474,6 +1503,9 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/* Number of zero pages */
+static unsigned int nr_zero_pages;
+
 /*
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
@@ -1494,6 +1526,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1538,6 +1573,7 @@ void swsusp_free(void)
 out:
 	nr_copy_pages = 0;
 	nr_meta_pages = 0;
+	nr_zero_pages = 0;
 	restore_pblist = NULL;
 	buffer = NULL;
 	alloc_normal = 0;
@@ -1756,8 +1792,15 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	nr_zero_pages = 0;
 
 	/* Count the number of saveable data pages. */
 	save_highmem = count_highmem_pages();
@@ -2037,19 +2080,19 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	copy_data_pages(&copy_bm, &orig_bm, &zero_bm, &nr_zero_pages);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
-
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_copy_pages = nr_pages - nr_zero_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2094,15 +2137,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2110,6 +2160,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2151,7 +2203,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2247,24 +2299,34 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
-	int j;
+	int j, zero;
+	unsigned long decoded_pfn;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
+			if (zero) {
+				memory_bm_set_bit(zero_bm, decoded_pfn);
+				nr_zero_pages++;
+			}
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2486,6 +2548,7 @@ static inline void free_highmem_data(void) {}
  * prepare_image - Make room for loading hibernation image.
  * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
+ * @zero_bm: Memory bitmap containing the zero pages.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
  * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2496,8 +2559,12 @@ static inline void free_highmem_data(void) {}
  * pages will be used for just yet.  Instead, we mark them all as allocated and
  * create a lists of "safe" pages to be used later.  On systems with high
  * memory a list of "safe" highmem pages is created too.
+ *
+ * Because we didn't know which pages were unsafe when we created the zero bm we
+ * will make a copy of it and recreate it within safe pages.
  */
-static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	unsigned int nr_pages, nr_highmem;
 	struct linked_page *lp;
@@ -2516,6 +2583,20 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
 	duplicate_memory_bitmap(new_bm, bm);
 	memory_bm_free(bm, PG_UNSAFE_KEEP);
+	error = memory_bm_create(bm, GFP_ATOMIC, PG_ANY);
+	if (error)
+		goto Free;
+
+	/* use bm as storage while we rebuild zero_bm using safe pages */
+	duplicate_memory_bitmap(bm, zero_bm);
+	memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
+	error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+	duplicate_memory_bitmap(zero_bm, bm);
+	memory_bm_free(bm, PG_UNSAFE_KEEP);
+	/* at this point zero_bm is in safe pages and we can use it while restoring */
+
 	if (nr_highmem > 0) {
 		error = prepare_highmem_image(bm, &nr_highmem);
 		if (error)
@@ -2530,7 +2611,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	 *
 	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2543,7 +2624,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
 		if (!lp) {
@@ -2631,8 +2712,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
 		return 0;
 
 	handle->sync_read = 1;
@@ -2657,19 +2739,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
+		nr_zero_pages = 0;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
 		if (handle->cur == nr_meta_pages + 1) {
-			error = prepare_image(&orig_bm, &copy_bm);
+			error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
 			if (error)
 				return error;
 
 			chain_init(&ca, GFP_ATOMIC, PG_SAFE);
 			memory_bm_position_reset(&orig_bm);
+			memory_bm_position_reset(&zero_bm);
 			restore_pblist = NULL;
 			handle->buffer = get_buffer(&orig_bm, &ca);
 			handle->sync_read = 0;
@@ -2686,6 +2775,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if ((handle->cur > (nr_meta_pages + 1)) &&
+			memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
@@ -2702,7 +2799,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 	copy_last_highmem_page();
 	hibernate_restore_protect_page(handle->buffer);
 	/* Do that only if we have loaded the image entirely */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
 		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
-- 
2.39.2.722.g9855ee24e9-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2] PM: hibernate: don't store zero pages in the image file.
  2023-02-09 19:44   ` Rafael J. Wysocki
  2023-02-13 14:45     ` Brian Geffon
@ 2023-03-02 17:25     ` Brian Geffon
  1 sibling, 0 replies; 16+ messages in thread
From: Brian Geffon @ 2023-03-02 17:25 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel

On Thu, Feb 9, 2023 at 2:44 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Sat, Jan 14, 2023 at 1:15 AM Brian Geffon <bgeffon@google.com> wrote:
> >
> > On ChromeOS we've observed a considerable number of in-use pages filled
> > with zeros. Today with hibernate it's entirely possible that saveable
> > pages are just zero filled. Since we're already copying pages
> > word-by-word in do_copy_page it becomes almost free to determine if a
> > page was completely filled with zeros.
> >
> > This change introduces a new bitmap which will track these zero pages.
> > If a page is zero it will not be included in the saved image, instead to
> > track these zero pages in the image file we will introduce a new flag
> > which we will set on the packed PFN list. When reading back in the image
> > file we will detect these zero page PFNs and rebuild the zero page bitmap.
> >
> > When the image is being loaded through calls to snapshot_write_next if we
> > encounter a zero page we will silently memset it to 0 and then continue on
> > to the next page. Given the implementation in snapshot_read_next and
> > snapshot_write_next this change  will be transparent to non-compressed,
> > compressed, and swsusp modes of operation.
> >
> > To provide some concrete numbers from simple ad-hoc testing, on a device
> > which was lightly in use we saw that:
> >
> >  PM: hibernation: Image created (964408 pages copied, 548304 zero pages)
> >
> > Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero
> > filled and could be tracked entirely within the packed PFN list. The
> > savings would obviously be much lower for lzo compressed images, but even
> > in the case of compression not copying pages across to the compression
> > threads will still speed things up. It's also possible that we would see
> > better overall compression ratios as larger regions of "real data" would
> > improve the compressibility.
> >
> > Finally, such an approach could dramatically improve swsusp performance
> > as each one of those zero pages requires a write syscall to reload, by
> > handling it as part of the packed PFN list we're able to fully avoid
> > that.
> >
> > patch v2:
> >         - correct a minor issue when rebasing.
>
> I need some more time to go through this in more detail, so it is
> likely to miss 6.3.  Sorry about that.

Hi Rafael,
I mailed a v3 patch which fixed an issue where the zero bm wasn't
being allocated in safe pages
on resume. This patch now appears to be pretty stable, I haven't seen
any issues so far. If you would
be so kind,  I'd appreciate any feedback you have.

Thanks in advance!

>
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > ---
> >  kernel/power/snapshot.c | 129 ++++++++++++++++++++++++++++++----------
> >  1 file changed, 99 insertions(+), 30 deletions(-)
> >
> > diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> > index cd8b7b35f1e8..8d0ba36b0218 100644
> > --- a/kernel/power/snapshot.c
> > +++ b/kernel/power/snapshot.c
> > @@ -404,6 +404,7 @@ struct bm_position {
> >         struct mem_zone_bm_rtree *zone;
> >         struct rtree_node *node;
> >         unsigned long node_pfn;
> > +       unsigned long cur_pfn;
> >         int node_bit;
> >  };
> >
> > @@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
> >         bm->cur.node = list_entry(bm->cur.zone->leaves.next,
> >                                   struct rtree_node, list);
> >         bm->cur.node_pfn = 0;
> > +       bm->cur.cur_pfn = BM_END_OF_MAP;
> >         bm->cur.node_bit = 0;
> >  }
> >
> > @@ -850,6 +852,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
> >         clear_bit(bit, bm->cur.node->data);
> >  }
> >
> > +static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
> > +{
> > +       return bm->cur.cur_pfn;
> > +}
> > +
> >  static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
> >  {
> >         void *addr;
> > @@ -929,10 +936,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
> >                 if (bit < bits) {
> >                         pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
> >                         bm->cur.node_bit = bit + 1;
> > +                       bm->cur.cur_pfn = pfn;
> >                         return pfn;
> >                 }
> >         } while (rtree_next_node(bm));
> >
> > +       bm->cur.cur_pfn = BM_END_OF_MAP;
> >         return BM_END_OF_MAP;
> >  }
> >
> > @@ -1371,14 +1380,18 @@ static unsigned int count_data_pages(void)
> >
> >  /*
> >   * This is needed, because copy_page and memcpy are not usable for copying
> > - * task structs.
> > + * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.
> >   */
> > -static inline void do_copy_page(long *dst, long *src)
> > +static inline int do_copy_page(long *dst, long *src)
> >  {
> >         int n;
> > +       long z = 0;
> >
> > -       for (n = PAGE_SIZE / sizeof(long); n; n--)
> > +       for (n = PAGE_SIZE / sizeof(long); n; n--) {
> > +               z |= *src;
> >                 *dst++ = *src++;
> > +       }
> > +       return !z;
> >  }
> >
> >  /**
> > @@ -1389,15 +1402,17 @@ static inline void do_copy_page(long *dst, long *src)
> >   * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
> >   * always returns 'true'.
> >   */
> > -static void safe_copy_page(void *dst, struct page *s_page)
> > +static int safe_copy_page(void *dst, struct page *s_page)
> >  {
> > +       int ret;
> >         if (kernel_page_present(s_page)) {
> > -               do_copy_page(dst, page_address(s_page));
> > +               ret = do_copy_page(dst, page_address(s_page));
> >         } else {
> >                 hibernate_map_page(s_page);
> > -               do_copy_page(dst, page_address(s_page));
> > +               ret = do_copy_page(dst, page_address(s_page));
> >                 hibernate_unmap_page(s_page);
> >         }
> > +       return ret;
> >  }
> >
> >  #ifdef CONFIG_HIGHMEM
> > @@ -1407,17 +1422,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
> >                 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
> >  }
> >
> > -static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> > +static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> >  {
> >         struct page *s_page, *d_page;
> >         void *src, *dst;
> > +       int ret;
> >
> >         s_page = pfn_to_page(src_pfn);
> >         d_page = pfn_to_page(dst_pfn);
> >         if (PageHighMem(s_page)) {
> >                 src = kmap_atomic(s_page);
> >                 dst = kmap_atomic(d_page);
> > -               do_copy_page(dst, src);
> > +               ret = do_copy_page(dst, src);
> >                 kunmap_atomic(dst);
> >                 kunmap_atomic(src);
> >         } else {
> > @@ -1426,30 +1442,32 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> >                          * The page pointed to by src may contain some kernel
> >                          * data modified by kmap_atomic()
> >                          */
> > -                       safe_copy_page(buffer, s_page);
> > +                       ret = safe_copy_page(buffer, s_page);
> >                         dst = kmap_atomic(d_page);
> >                         copy_page(dst, buffer);
> >                         kunmap_atomic(dst);
> >                 } else {
> > -                       safe_copy_page(page_address(d_page), s_page);
> > +                       ret = safe_copy_page(page_address(d_page), s_page);
> >                 }
> >         }
> > +       return ret;
> >  }
> >  #else
> >  #define page_is_saveable(zone, pfn)    saveable_page(zone, pfn)
> >
> > -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> > +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> >  {
> > -       safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> > +       return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> >                                 pfn_to_page(src_pfn));
> >  }
> >  #endif /* CONFIG_HIGHMEM */
> >
> >  static void copy_data_pages(struct memory_bitmap *copy_bm,
> > -                           struct memory_bitmap *orig_bm)
> > +                           struct memory_bitmap *orig_bm,
> > +                           struct memory_bitmap *zero_bm)
> >  {
> >         struct zone *zone;
> > -       unsigned long pfn;
> > +       unsigned long pfn, copy_pfn;
> >
> >         for_each_populated_zone(zone) {
> >                 unsigned long max_zone_pfn;
> > @@ -1462,11 +1480,18 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
> >         }
> >         memory_bm_position_reset(orig_bm);
> >         memory_bm_position_reset(copy_bm);
> > +       copy_pfn = memory_bm_next_pfn(copy_bm);
> >         for(;;) {
> >                 pfn = memory_bm_next_pfn(orig_bm);
> >                 if (unlikely(pfn == BM_END_OF_MAP))
> >                         break;
> > -               copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
> > +               if (copy_data_page(copy_pfn, pfn)) {
> > +                       memory_bm_set_bit(zero_bm, pfn);
> > +
> > +                       /* We will reuse this copy_pfn for a real 'nonzero' page. */
> > +                       continue;
> > +               }
> > +               copy_pfn = memory_bm_next_pfn(copy_bm);
> >         }
> >  }
> >
> > @@ -1494,6 +1519,9 @@ static struct memory_bitmap orig_bm;
> >   */
> >  static struct memory_bitmap copy_bm;
> >
> > +/* Memory bitmap which tracks which saveable pages were zero filled. */
> > +static struct memory_bitmap zero_bm;
> > +
> >  /**
> >   * swsusp_free - Free pages allocated for hibernation image.
> >   *
> > @@ -1756,6 +1784,12 @@ int hibernate_preallocate_memory(void)
> >                 goto err_out;
> >         }
> >
> > +       error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
> > +       if (error) {
> > +               pr_err("Cannot allocate zero bitmap\n");
> > +               goto err_out;
> > +       }
> > +
> >         alloc_normal = 0;
> >         alloc_highmem = 0;
> >
> > @@ -2013,11 +2047,12 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
> >
> >  asmlinkage __visible int swsusp_save(void)
> >  {
> > -       unsigned int nr_pages, nr_highmem;
> > +       unsigned int nr_pages, nr_highmem, nr_zero_pages;
> >
> >         pr_info("Creating image:\n");
> >
> >         drain_local_pages(NULL);
> > +       nr_zero_pages = 0;
> >         nr_pages = count_data_pages();
> >         nr_highmem = count_highmem_pages();
> >         pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
> > @@ -2037,19 +2072,23 @@ asmlinkage __visible int swsusp_save(void)
> >          * Kill them.
> >          */
> >         drain_local_pages(NULL);
> > -       copy_data_pages(&copy_bm, &orig_bm);
> > +       copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
> >
> >         /*
> >          * End of critical section. From now on, we can write to memory,
> >          * but we should not touch disk. This specially means we must _not_
> >          * touch swap space! Except we must write out our image of course.
> >          */
> > +       memory_bm_position_reset(&zero_bm);
> > +       while (memory_bm_next_pfn(&zero_bm) != BM_END_OF_MAP)
> > +               nr_zero_pages++;
> >
> >         nr_pages += nr_highmem;
> > -       nr_copy_pages = nr_pages;
> > +       /* We don't actually copy the zero pages */
> > +       nr_copy_pages = nr_pages - nr_zero_pages;
> >         nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
> >
> > -       pr_info("Image created (%d pages copied)\n", nr_pages);
> > +       pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
> >
> >         return 0;
> >  }
> > @@ -2094,15 +2133,22 @@ static int init_header(struct swsusp_info *info)
> >         return init_header_complete(info);
> >  }
> >
> > +#define ENCODED_PFN_ZERO_FLAG (1UL << (BITS_PER_LONG - 1))
> > +#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
> > +
> >  /**
> >   * pack_pfns - Prepare PFNs for saving.
> >   * @bm: Memory bitmap.
> >   * @buf: Memory buffer to store the PFNs in.
> > + * @zero_bm: Memory bitmap containing PFNs of zero pages.
> >   *
> >   * PFNs corresponding to set bits in @bm are stored in the area of memory
> > - * pointed to by @buf (1 page at a time).
> > + * pointed to by @buf (1 page at a time). Pages which were filled with only
> > + * zeros will have the highest bit set in the packed format to distinguish
> > + * them from PFNs which will be contained in the image file.
> >   */
> > -static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
> > +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
> > +               struct memory_bitmap *zero_bm)
> >  {
> >         int j;
> >
> > @@ -2110,6 +2156,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
> >                 buf[j] = memory_bm_next_pfn(bm);
> >                 if (unlikely(buf[j] == BM_END_OF_MAP))
> >                         break;
> > +               if (memory_bm_test_bit(zero_bm, buf[j]))
> > +                       buf[j] |= ENCODED_PFN_ZERO_FLAG;
> >         }
> >  }
> >
> > @@ -2151,7 +2199,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
> >                 memory_bm_position_reset(&copy_bm);
> >         } else if (handle->cur <= nr_meta_pages) {
> >                 clear_page(buffer);
> > -               pack_pfns(buffer, &orig_bm);
> > +               pack_pfns(buffer, &orig_bm, &zero_bm);
> >         } else {
> >                 struct page *page;
> >
> > @@ -2247,24 +2295,32 @@ static int load_header(struct swsusp_info *info)
> >   * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
> >   * @bm: Memory bitmap.
> >   * @buf: Area of memory containing the PFNs.
> > + * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
> >   *
> >   * For each element of the array pointed to by @buf (1 page at a time), set the
> > - * corresponding bit in @bm.
> > + * corresponding bit in @bm. If the page was originally populated with only
> > + * zeros then a corresponding bit will also be set in @zero_bm.
> >   */
> > -static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
> > +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
> > +               struct memory_bitmap *zero_bm)
> >  {
> > -       int j;
> > +       int j, zero;
> > +       unsigned long decoded_pfn;
> >
> >         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
> >                 if (unlikely(buf[j] == BM_END_OF_MAP))
> >                         break;
> >
> > -               if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
> > -                       memory_bm_set_bit(bm, buf[j]);
> > +               zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
> > +               decoded_pfn = buf[j] & ENCODED_PFN_MASK;
> > +               if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
> > +                       memory_bm_set_bit(bm, decoded_pfn);
> > +                       if (zero)
> > +                               memory_bm_set_bit(zero_bm, decoded_pfn);
> >                 } else {
> > -                       if (!pfn_valid(buf[j]))
> > +                       if (!pfn_valid(decoded_pfn))
> >                                 pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
> > -                                      (unsigned long long)PFN_PHYS(buf[j]));
> > +                                      (unsigned long long)PFN_PHYS(decoded_pfn));
> >                         return -EFAULT;
> >                 }
> >         }
> > @@ -2631,6 +2687,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
> >         static struct chain_allocator ca;
> >         int error = 0;
> >
> > +next:
> >         /* Check if we have already loaded the entire image */
> >         if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
> >                 return 0;
> > @@ -2657,9 +2714,13 @@ int snapshot_write_next(struct snapshot_handle *handle)
> >                 if (error)
> >                         return error;
> >
> > +               error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
> > +               if (error)
> > +                       return error;
> > +
> >                 hibernate_restore_protection_begin();
> >         } else if (handle->cur <= nr_meta_pages + 1) {
> > -               error = unpack_orig_pfns(buffer, &copy_bm);
> > +               error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
> >                 if (error)
> >                         return error;
> >
> > @@ -2686,6 +2747,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
> >                         handle->sync_read = 0;
> >         }
> >         handle->cur++;
> > +
> > +       /* Zero pages were not included in the image, memset it and move on. */
> > +       if ((handle->cur > nr_meta_pages + 1) &&
> > +                       memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
> > +               memset(handle->buffer, 0, PAGE_SIZE);
> > +               goto next;
> > +       }
> > +
> >         return PAGE_SIZE;
> >  }
> >
> > --
> > 2.39.0.314.g84b9a713c41-goog
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] PM: hibernate: don't store zero pages in the image file.
  2023-03-02 17:13 ` [PATCH v3] " Brian Geffon
@ 2023-03-23 16:19   ` Matthias Kaehlcke
  2023-03-27 14:20     ` Brian Geffon
  0 siblings, 1 reply; 16+ messages in thread
From: Matthias Kaehlcke @ 2023-03-23 16:19 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Rafael J. Wysocki, Pavel Machek, Len Brown, linux-pm,
	linux-kernel

On Thu, Mar 02, 2023 at 12:13:48PM -0500, Brian Geffon wrote:
> On ChromeOS we've observed a considerable number of in-use pages filled with
> zeros. Today with hibernate it's entirely possible that saveable pages are just
> zero filled. Since we're already copying pages word-by-word in do_copy_page it
> becomes almost free to determine if a page was completely filled with zeros.
> 
> This change introduces a new bitmap which will track these zero pages. If a page
> is zero it will not be included in the saved image, instead to track these zero
> pages in the image file we will introduce a new flag which we will set on the
> packed PFN list. When reading back in the image file we will detect these zero
> page PFNs and rebuild the zero page bitmap.
> 
> When the image is being loaded through calls to write_next_page if we encounter
> a zero page we will silently memset it to 0 and then continue on to the next
> page. Given the implementation in snapshot_read_next/snapshot_write_next this
> change  will be transparent to non-compressed/compressed and swsusp modes of
> operation.
> 
> To provide some concrete numbers from simple ad-hoc testing, on a device which
> was lightly in use we saw that:
> 
>   PM: hibernation: Image created (964408 pages copied, 548304 zero pages)
> 
> Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
> and could be tracked entirely within the packed PFN list. The savings would
> obviously be much lower for lzo compressed images, but even in the case of
> compression not copying pages across to the compression threads will still
> speed things up. It's also possible that we would see better overall compression
> ratios as larger regions of "real data" would improve the compressibility.
> 
> Finally, such an approach could dramatically improve swsusp performance
> as each one of those zero pages requires a write syscall to reload, by
> handling it as part of the packed PFN list we're able to fully avoid
> that.
> 
>  Patch v2 -> v3:
>    - Use nr_zero_pages rather than walking each pfn to count.
>    - Make sure zero_bm is allocated in safe pages on resume.
>      When reading in the pfn list and building the zero page bm
>      we don't know which pages are unsafe yet so we will need to
>      copy this bm to safe pages after the metadata has been read.
> 
>  Patch v1 -> v2:
>    - minor code mistake from rebasing corrected.
> 
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> ---
>  kernel/power/snapshot.c | 169 +++++++++++++++++++++++++++++++---------
>  1 file changed, 133 insertions(+), 36 deletions(-)
> 
> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> index cd8b7b35f1e8b..a2c4fe17f9067 100644
> --- a/kernel/power/snapshot.c
> +++ b/kernel/power/snapshot.c

...

> @@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
>  
>  /*
>   * This is needed, because copy_page and memcpy are not usable for copying
> - * task structs.
> + * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.

nit: s/a page/the page/

>   */
> -static inline void do_copy_page(long *dst, long *src)
> +static inline int do_copy_page(long *dst, long *src)
>  {
>  	int n;
> +	long z = 0;
>  
> -	for (n = PAGE_SIZE / sizeof(long); n; n--)
> +	for (n = PAGE_SIZE / sizeof(long); n; n--) {
> +		z |= *src;
>  		*dst++ = *src++;
> +	}
> +	return !z;
>  }

...

> -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>  {
> -	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> +	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
>  				pfn_to_page(src_pfn));
>  }
>  #endif /* CONFIG_HIGHMEM */
>  
>  static void copy_data_pages(struct memory_bitmap *copy_bm,
> -			    struct memory_bitmap *orig_bm)
> +			    struct memory_bitmap *orig_bm,
> +			    struct memory_bitmap *zero_bm,
> +			    unsigned int *zero_count)
>  {
>  	struct zone *zone;
> -	unsigned long pfn;
> +	unsigned long pfn, copy_pfn;
>  
>  	for_each_populated_zone(zone) {
>  		unsigned long max_zone_pfn;
> @@ -1462,11 +1482,20 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
>  	}
>  	memory_bm_position_reset(orig_bm);
>  	memory_bm_position_reset(copy_bm);
> +	copy_pfn = memory_bm_next_pfn(copy_bm);
>  	for(;;) {
>  		pfn = memory_bm_next_pfn(orig_bm);
>  		if (unlikely(pfn == BM_END_OF_MAP))
>  			break;
> -		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
> +		if (copy_data_page(copy_pfn, pfn)) {
> +			memory_bm_set_bit(zero_bm, pfn);
> +			if (zero_count)

This check is not needed. The function is called only once, with a pointer. The kernel
trusts itself if the pointer is supposed to be always != NULL.

Or better: use a local counter and have copy_data_pages() return the number of pages
that were actually copied, which is what the caller is interested in.

> +				(*zero_count)++;
> +
> +			/* We will reuse this copy_pfn for a real 'nonzero' page. */
> +			continue;
> +		}
> +		copy_pfn = memory_bm_next_pfn(copy_bm);
>  	}
>  }

...

> @@ -2247,24 +2299,34 @@ static int load_header(struct swsusp_info *info)
>   * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
>   * @bm: Memory bitmap.
>   * @buf: Area of memory containing the PFNs.
> + * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
>   *
>   * For each element of the array pointed to by @buf (1 page at a time), set the
> - * corresponding bit in @bm.
> + * corresponding bit in @bm. If the the page was originally populated with only
> + * zeros then a corresponding bit will also be set in @zero_bm.

s/the the/the/

...

> @@ -2486,6 +2548,7 @@ static inline void free_highmem_data(void) {}
>   * prepare_image - Make room for loading hibernation image.
>   * @new_bm: Uninitialized memory bitmap structure.
>   * @bm: Memory bitmap with unsafe pages marked.
> + * @zero_bm: Memory bitmap containing the zero pages.

That sounds as if the memory bitmap actually contained zero pages. I suggest to
change it to something like the comment for 'bm' above, i.e. "... with zero
pages marked"

>   *
>   * Use @bm to mark the pages that will be overwritten in the process of
>   * restoring the system memory state from the suspend image ("unsafe" pages)
> @@ -2496,8 +2559,12 @@ static inline void free_highmem_data(void) {}
>   * pages will be used for just yet.  Instead, we mark them all as allocated and
>   * create a lists of "safe" pages to be used later.  On systems with high
>   * memory a list of "safe" highmem pages is created too.
> + *
> + * Because we didn't know which pages were unsafe when we created the zero bm we
> + * will make a copy of it and recreate it within safe pages.

nit: s/we will make/we make/

>   */
> -static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
> +static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
> +		struct memory_bitmap *zero_bm)
>  {
>  	unsigned int nr_pages, nr_highmem;
>  	struct linked_page *lp;
> @@ -2516,6 +2583,20 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
>  
>  	duplicate_memory_bitmap(new_bm, bm);
>  	memory_bm_free(bm, PG_UNSAFE_KEEP);
> +	error = memory_bm_create(bm, GFP_ATOMIC, PG_ANY);
> +	if (error)
> +		goto Free;
> +
> +	/* use bm as storage while we rebuild zero_bm using safe pages */

Re-using the 'bm' parameter is confusing, it should be avoided IMO unless there
is a real benefit. struct memory_bitmap isn't that big, why not create a local
variable 'zero_mb_tmp' or similar as a temporary store for the zero page bitmap?

> +	duplicate_memory_bitmap(bm, zero_bm);
> +	memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
> +	error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
> +	if (error)
> +		goto Free;
> +	duplicate_memory_bitmap(zero_bm, bm);
> +	memory_bm_free(bm, PG_UNSAFE_KEEP);
> +	/* at this point zero_bm is in safe pages and we can use it while restoring */
> +
>  	if (nr_highmem > 0) {
>  		error = prepare_highmem_image(bm, &nr_highmem);
>  		if (error)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3] PM: hibernate: don't store zero pages in the image file.
  2023-03-23 16:19   ` Matthias Kaehlcke
@ 2023-03-27 14:20     ` Brian Geffon
  0 siblings, 0 replies; 16+ messages in thread
From: Brian Geffon @ 2023-03-27 14:20 UTC (permalink / raw)
  To: Matthias Kaehlcke
  Cc: Rafael J. Wysocki, Pavel Machek, Len Brown, linux-pm,
	linux-kernel

On Thu, Mar 23, 2023 at 12:19 PM Matthias Kaehlcke <mka@chromium.org> wrote:
>

Hi Matthias,
Thanks for taking a look.

> On Thu, Mar 02, 2023 at 12:13:48PM -0500, Brian Geffon wrote:
> > On ChromeOS we've observed a considerable number of in-use pages filled with
> > zeros. Today with hibernate it's entirely possible that saveable pages are just
> > zero filled. Since we're already copying pages word-by-word in do_copy_page it
> > becomes almost free to determine if a page was completely filled with zeros.
> >
> > This change introduces a new bitmap which will track these zero pages. If a page
> > is zero it will not be included in the saved image, instead to track these zero
> > pages in the image file we will introduce a new flag which we will set on the
> > packed PFN list. When reading back in the image file we will detect these zero
> > page PFNs and rebuild the zero page bitmap.
> >
> > When the image is being loaded through calls to write_next_page if we encounter
> > a zero page we will silently memset it to 0 and then continue on to the next
> > page. Given the implementation in snapshot_read_next/snapshot_write_next this
> > change  will be transparent to non-compressed/compressed and swsusp modes of
> > operation.
> >
> > To provide some concrete numbers from simple ad-hoc testing, on a device which
> > was lightly in use we saw that:
> >
> >   PM: hibernation: Image created (964408 pages copied, 548304 zero pages)
> >
> > Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
> > and could be tracked entirely within the packed PFN list. The savings would
> > obviously be much lower for lzo compressed images, but even in the case of
> > compression not copying pages across to the compression threads will still
> > speed things up. It's also possible that we would see better overall compression
> > ratios as larger regions of "real data" would improve the compressibility.
> >
> > Finally, such an approach could dramatically improve swsusp performance
> > as each one of those zero pages requires a write syscall to reload, by
> > handling it as part of the packed PFN list we're able to fully avoid
> > that.
> >
> >  Patch v2 -> v3:
> >    - Use nr_zero_pages rather than walking each pfn to count.
> >    - Make sure zero_bm is allocated in safe pages on resume.
> >      When reading in the pfn list and building the zero page bm
> >      we don't know which pages are unsafe yet so we will need to
> >      copy this bm to safe pages after the metadata has been read.
> >
> >  Patch v1 -> v2:
> >    - minor code mistake from rebasing corrected.
> >
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > ---
> >  kernel/power/snapshot.c | 169 +++++++++++++++++++++++++++++++---------
> >  1 file changed, 133 insertions(+), 36 deletions(-)
> >
> > diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> > index cd8b7b35f1e8b..a2c4fe17f9067 100644
> > --- a/kernel/power/snapshot.c
> > +++ b/kernel/power/snapshot.c
>
> ...
>
> > @@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
> >
> >  /*
> >   * This is needed, because copy_page and memcpy are not usable for copying
> > - * task structs.
> > + * task structs. Returns 1 if a page was filled with only zeros, otherwise 0.
>
> nit: s/a page/the page/

Ack. will fix in follow up version.

>
> >   */
> > -static inline void do_copy_page(long *dst, long *src)
> > +static inline int do_copy_page(long *dst, long *src)
> >  {
> >       int n;
> > +     long z = 0;
> >
> > -     for (n = PAGE_SIZE / sizeof(long); n; n--)
> > +     for (n = PAGE_SIZE / sizeof(long); n; n--) {
> > +             z |= *src;
> >               *dst++ = *src++;
> > +     }
> > +     return !z;
> >  }
>
> ...
>
> > -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> > +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> >  {
> > -     safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> > +     return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> >                               pfn_to_page(src_pfn));
> >  }
> >  #endif /* CONFIG_HIGHMEM */
> >
> >  static void copy_data_pages(struct memory_bitmap *copy_bm,
> > -                         struct memory_bitmap *orig_bm)
> > +                         struct memory_bitmap *orig_bm,
> > +                         struct memory_bitmap *zero_bm,
> > +                         unsigned int *zero_count)
> >  {
> >       struct zone *zone;
> > -     unsigned long pfn;
> > +     unsigned long pfn, copy_pfn;
> >
> >       for_each_populated_zone(zone) {
> >               unsigned long max_zone_pfn;
> > @@ -1462,11 +1482,20 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
> >       }
> >       memory_bm_position_reset(orig_bm);
> >       memory_bm_position_reset(copy_bm);
> > +     copy_pfn = memory_bm_next_pfn(copy_bm);
> >       for(;;) {
> >               pfn = memory_bm_next_pfn(orig_bm);
> >               if (unlikely(pfn == BM_END_OF_MAP))
> >                       break;
> > -             copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
> > +             if (copy_data_page(copy_pfn, pfn)) {
> > +                     memory_bm_set_bit(zero_bm, pfn);
> > +                     if (zero_count)
>
> This check is not needed. The function is called only once, with a pointer. The kernel
> trusts itself if the pointer is supposed to be always != NULL.
>
> Or better: use a local counter and have copy_data_pages() return the number of pages
> that were actually copied, which is what the caller is interested in.

That makes a lot of sense, I'll switch to this approach in next iteration.

>
> > +                             (*zero_count)++;
> > +
> > +                     /* We will reuse this copy_pfn for a real 'nonzero' page. */
> > +                     continue;
> > +             }
> > +             copy_pfn = memory_bm_next_pfn(copy_bm);
> >       }
> >  }
>
> ...
>
> > @@ -2247,24 +2299,34 @@ static int load_header(struct swsusp_info *info)
> >   * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
> >   * @bm: Memory bitmap.
> >   * @buf: Area of memory containing the PFNs.
> > + * @zero_bm: Memory bitmap which will be populated with the PFNs of zero pages.
> >   *
> >   * For each element of the array pointed to by @buf (1 page at a time), set the
> > - * corresponding bit in @bm.
> > + * corresponding bit in @bm. If the the page was originally populated with only
> > + * zeros then a corresponding bit will also be set in @zero_bm.
>
> s/the the/the/

Ack.

>
> ...
>
> > @@ -2486,6 +2548,7 @@ static inline void free_highmem_data(void) {}
> >   * prepare_image - Make room for loading hibernation image.
> >   * @new_bm: Uninitialized memory bitmap structure.
> >   * @bm: Memory bitmap with unsafe pages marked.
> > + * @zero_bm: Memory bitmap containing the zero pages.
>
> That sounds as if the memory bitmap actually contained zero pages. I suggest to
> change it to something like the comment for 'bm' above, i.e. "... with zero
> pages marked"

Sure that makes sense, will reword it.

>
> >   *
> >   * Use @bm to mark the pages that will be overwritten in the process of
> >   * restoring the system memory state from the suspend image ("unsafe" pages)
> > @@ -2496,8 +2559,12 @@ static inline void free_highmem_data(void) {}
> >   * pages will be used for just yet.  Instead, we mark them all as allocated and
> >   * create a lists of "safe" pages to be used later.  On systems with high
> >   * memory a list of "safe" highmem pages is created too.
> > + *
> > + * Because we didn't know which pages were unsafe when we created the zero bm we
> > + * will make a copy of it and recreate it within safe pages.
>
> nit: s/we will make/we make/

Ack.

>
> >   */
> > -static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
> > +static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
> > +             struct memory_bitmap *zero_bm)
> >  {
> >       unsigned int nr_pages, nr_highmem;
> >       struct linked_page *lp;
> > @@ -2516,6 +2583,20 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
> >
> >       duplicate_memory_bitmap(new_bm, bm);
> >       memory_bm_free(bm, PG_UNSAFE_KEEP);
> > +     error = memory_bm_create(bm, GFP_ATOMIC, PG_ANY);
> > +     if (error)
> > +             goto Free;
> > +
> > +     /* use bm as storage while we rebuild zero_bm using safe pages */
>
> Re-using the 'bm' parameter is confusing, it should be avoided IMO unless there
> is a real benefit. struct memory_bitmap isn't that big, why not create a local
> variable 'zero_mb_tmp' or similar as a temporary store for the zero page bitmap?

Sure, I'll send an updated patch which does that.

>
> > +     duplicate_memory_bitmap(bm, zero_bm);
> > +     memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
> > +     error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
> > +     if (error)
> > +             goto Free;
> > +     duplicate_memory_bitmap(zero_bm, bm);
> > +     memory_bm_free(bm, PG_UNSAFE_KEEP);
> > +     /* at this point zero_bm is in safe pages and we can use it while restoring */
> > +
> >       if (nr_highmem > 0) {
> >               error = prepare_highmem_image(bm, &nr_highmem);
> >               if (error)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v4] PM: hibernate: don't store zero pages in the image file.
  2023-01-13 19:30 [PATCH] PM: hibernate: don't store zero pages in the image file Brian Geffon
  2023-01-14  0:15 ` [PATCH v2] " Brian Geffon
  2023-03-02 17:13 ` [PATCH v3] " Brian Geffon
@ 2023-04-06 18:20 ` Brian Geffon
  2023-06-22 16:22   ` Rafael J. Wysocki
  2023-06-26 15:44 ` [PATCH] " Brian Geffon
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Brian Geffon @ 2023-04-06 18:20 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Matthias Kaehlcke, Pavel Machek, Len Brown, linux-pm,
	linux-kernel, Brian Geffon

On ChromeOS we've observed a considerable number of in-use pages filled with
zeros. Today with hibernate it's entirely possible that saveable pages are just
zero filled. Since we're already copying pages word-by-word in do_copy_page it
becomes almost free to determine if a page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages. If a page
is zero it will not be included in the saved image, instead to track these zero
pages in the image file we will introduce a new flag which we will set on the
packed PFN list. When reading back in the image file we will detect these zero
page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to write_next_page if we encounter
a zero page we will silently memset it to 0 and then continue on to the next
page. Given the implementation in snapshot_read_next/snapshot_write_next this
change  will be transparent to non-compressed/compressed and swsusp modes of
operation.

To provide some concrete numbers from simple ad-hoc testing, on a device which
was lightly in use we saw that:

PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
and could be tracked entirely within the packed PFN list. The savings would
obviously be much lower for lzo compressed images, but even in the case of
compression not copying pages across to the compression threads will still
speed things up. It's also possible that we would see better overall compression
ratios as larger regions of "real data" would improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

Patch v3 -> v4:
- Suggestions from Matthias Kaehlcke:
 - Return number of copy pages from copy_data_pages
 - Use an explicit temporary bitmap while moving the zerm_bm
   to safe pages.

Patch v2 -> v3:
- Use nr_zero_pages rather than walking each pfn to count.
- Make sure zero_bm is allocated in safe pages on resume.
 When reading in the pfn list and building the zero page bm
 we don't know which pages are unsafe yet so we will need to
 copy this bm to safe pages after the metadata has been read.

Patch v1 -> v2:
- minor code mistake from rebasing corrected.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 kernel/power/snapshot.c | 180 +++++++++++++++++++++++++++++++---------
 1 file changed, 142 insertions(+), 38 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..43d0c45fa05c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 	bm->cur.zone = zone;
 	bm->cur.node = node;
 	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur.cur_pfn = pfn;
 
 	/* Set return values */
 	*addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns 1 if the page was filled with only zeros, otherwise 0.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline int do_copy_page(long *dst, long *src)
 {
 	int n;
+	long z = 0;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1389,15 +1403,17 @@ static inline void do_copy_page(long *dst, long *src)
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
  * always returns 'true'.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static int safe_copy_page(void *dst, struct page *s_page)
 {
+	int ret;
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		ret = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return ret;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1407,17 +1423,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	int ret;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		ret = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1426,30 +1443,38 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			ret = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			ret = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return ret;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+/*
+ * Copy data pages will copy all pages into pages pulled from the copy_bm.
+ * If a page was entirely filled with zeros it will be marked in the zero_bm.
+ *
+ * Returns the number of pages copied.
+ */
+static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm)
 {
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn, copied_pages = 0;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1462,18 +1487,30 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+
+			/* We will reuse this copy_pfn for a real 'nonzero' page. */
+			continue;
+		}
+		copied_pages++;
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
+	return copied_pages;
 }
 
 /* Total number of image pages */
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/* Number of zero pages */
+static unsigned int nr_zero_pages;
+
 /*
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
@@ -1494,6 +1531,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1538,6 +1578,7 @@ void swsusp_free(void)
 out:
 	nr_copy_pages = 0;
 	nr_meta_pages = 0;
+	nr_zero_pages = 0;
 	restore_pblist = NULL;
 	buffer = NULL;
 	alloc_normal = 0;
@@ -1756,8 +1797,15 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	nr_zero_pages = 0;
 
 	/* Count the number of saveable data pages. */
 	save_highmem = count_highmem_pages();
@@ -2014,7 +2062,6 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
 asmlinkage __visible int swsusp_save(void)
 {
 	unsigned int nr_pages, nr_highmem;
-
 	pr_info("Creating image:\n");
 
 	drain_local_pages(NULL);
@@ -2037,19 +2084,19 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
-
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_zero_pages = nr_pages - nr_copy_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2094,15 +2141,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2110,6 +2164,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2151,7 +2207,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2247,24 +2303,34 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap with the zero PFNs marked.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
-	int j;
+	int j, zero;
+	unsigned long decoded_pfn;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
+			if (zero) {
+				memory_bm_set_bit(zero_bm, decoded_pfn);
+				nr_zero_pages++;
+			}
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2486,6 +2552,7 @@ static inline void free_highmem_data(void) {}
  * prepare_image - Make room for loading hibernation image.
  * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
+ * @zero_bm: Memory bitmap containing the zero pages.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
  * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2496,11 +2563,16 @@ static inline void free_highmem_data(void) {}
  * pages will be used for just yet.  Instead, we mark them all as allocated and
  * create a lists of "safe" pages to be used later.  On systems with high
  * memory a list of "safe" highmem pages is created too.
+ *
+ * Because we didn't know which pages were unsafe when we created the zero bm we
+ * make a copy of it and recreate it within safe pages.
  */
-static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	unsigned int nr_pages, nr_highmem;
 	struct linked_page *lp;
+	struct memory_bitmap tmp;
 	int error;
 
 	/* If there is no highmem, the buffer will not be necessary */
@@ -2516,6 +2588,22 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
 	duplicate_memory_bitmap(new_bm, bm);
 	memory_bm_free(bm, PG_UNSAFE_KEEP);
+
+	/* Make a copy of the zero bm so it can be created in safe pages */
+	error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
+	if (error)
+		goto Free;
+	duplicate_memory_bitmap(&tmp, zero_bm);
+	memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
+
+	/* Recreate zero_bm in safe pages */
+	error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+	duplicate_memory_bitmap(zero_bm, &tmp);
+	memory_bm_free(&tmp, PG_UNSAFE_KEEP);
+	/* at this point zero_bm is in safe pages and we can use it while restoring */
+
 	if (nr_highmem > 0) {
 		error = prepare_highmem_image(bm, &nr_highmem);
 		if (error)
@@ -2530,7 +2618,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	 *
 	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2543,7 +2631,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
 		if (!lp) {
@@ -2631,8 +2719,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
 		return 0;
 
 	handle->sync_read = 1;
@@ -2657,19 +2746,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
+		nr_zero_pages = 0;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
 		if (handle->cur == nr_meta_pages + 1) {
-			error = prepare_image(&orig_bm, &copy_bm);
+			error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
 			if (error)
 				return error;
 
 			chain_init(&ca, GFP_ATOMIC, PG_SAFE);
 			memory_bm_position_reset(&orig_bm);
+			memory_bm_position_reset(&zero_bm);
 			restore_pblist = NULL;
 			handle->buffer = get_buffer(&orig_bm, &ca);
 			handle->sync_read = 0;
@@ -2686,6 +2782,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if ((handle->cur > (nr_meta_pages + 1)) &&
+			memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
@@ -2702,7 +2806,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 	copy_last_highmem_page();
 	hibernate_restore_protect_page(handle->buffer);
 	/* Do that only if we have loaded the image entirely */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
 		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
-- 
2.40.0.577.gac1e443424-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v4] PM: hibernate: don't store zero pages in the image file.
  2023-04-06 18:20 ` [PATCH v4] " Brian Geffon
@ 2023-06-22 16:22   ` Rafael J. Wysocki
  0 siblings, 0 replies; 16+ messages in thread
From: Rafael J. Wysocki @ 2023-06-22 16:22 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Rafael J. Wysocki, Matthias Kaehlcke, Pavel Machek, Len Brown,
	linux-pm, linux-kernel

On Thu, Apr 6, 2023 at 8:20 PM Brian Geffon <bgeffon@google.com> wrote:
>
> On ChromeOS we've observed a considerable number of in-use pages filled with
> zeros. Today with hibernate it's entirely possible that saveable pages are just
> zero filled. Since we're already copying pages word-by-word in do_copy_page it
> becomes almost free to determine if a page was completely filled with zeros.
>
> This change introduces a new bitmap which will track these zero pages. If a page
> is zero it will not be included in the saved image, instead to track these zero
> pages in the image file we will introduce a new flag which we will set on the
> packed PFN list. When reading back in the image file we will detect these zero
> page PFNs and rebuild the zero page bitmap.
>
> When the image is being loaded through calls to write_next_page if we encounter
> a zero page we will silently memset it to 0 and then continue on to the next
> page. Given the implementation in snapshot_read_next/snapshot_write_next this
> change  will be transparent to non-compressed/compressed and swsusp modes of
> operation.
>
> To provide some concrete numbers from simple ad-hoc testing, on a device which
> was lightly in use we saw that:
>
> PM: hibernation: Image created (964408 pages copied, 548304 zero pages)
>
> Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
> and could be tracked entirely within the packed PFN list. The savings would
> obviously be much lower for lzo compressed images, but even in the case of
> compression not copying pages across to the compression threads will still
> speed things up. It's also possible that we would see better overall compression
> ratios as larger regions of "real data" would improve the compressibility.
>
> Finally, such an approach could dramatically improve swsusp performance
> as each one of those zero pages requires a write syscall to reload, by
> handling it as part of the packed PFN list we're able to fully avoid
> that.
>
> Patch v3 -> v4:
> - Suggestions from Matthias Kaehlcke:
>  - Return number of copy pages from copy_data_pages
>  - Use an explicit temporary bitmap while moving the zerm_bm
>    to safe pages.
>
> Patch v2 -> v3:
> - Use nr_zero_pages rather than walking each pfn to count.
> - Make sure zero_bm is allocated in safe pages on resume.
>  When reading in the pfn list and building the zero page bm
>  we don't know which pages are unsafe yet so we will need to
>  copy this bm to safe pages after the metadata has been read.
>
> Patch v1 -> v2:
> - minor code mistake from rebasing corrected.
>
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> ---
>  kernel/power/snapshot.c | 180 +++++++++++++++++++++++++++++++---------
>  1 file changed, 142 insertions(+), 38 deletions(-)
>
> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> index cd8b7b35f1e8..43d0c45fa05c 100644
> --- a/kernel/power/snapshot.c
> +++ b/kernel/power/snapshot.c
> @@ -404,6 +404,7 @@ struct bm_position {
>         struct mem_zone_bm_rtree *zone;
>         struct rtree_node *node;
>         unsigned long node_pfn;
> +       unsigned long cur_pfn;
>         int node_bit;
>  };
>
> @@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
>         bm->cur.node = list_entry(bm->cur.zone->leaves.next,
>                                   struct rtree_node, list);
>         bm->cur.node_pfn = 0;
> +       bm->cur.cur_pfn = BM_END_OF_MAP;
>         bm->cur.node_bit = 0;
>  }
>
> @@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
>         bm->cur.zone = zone;
>         bm->cur.node = node;
>         bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
> +       bm->cur.cur_pfn = pfn;
>
>         /* Set return values */
>         *addr = node->data;
> @@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
>         clear_bit(bit, bm->cur.node->data);
>  }
>
> +static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
> +{
> +       return bm->cur.cur_pfn;
> +}
> +
>  static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
>  {
>         void *addr;
> @@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
>                 if (bit < bits) {
>                         pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
>                         bm->cur.node_bit = bit + 1;
> +                       bm->cur.cur_pfn = pfn;
>                         return pfn;
>                 }
>         } while (rtree_next_node(bm));
>
> +       bm->cur.cur_pfn = BM_END_OF_MAP;
>         return BM_END_OF_MAP;
>  }
>
> @@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
>
>  /*
>   * This is needed, because copy_page and memcpy are not usable for copying
> - * task structs.
> + * task structs. Returns 1 if the page was filled with only zeros, otherwise 0.
>   */
> -static inline void do_copy_page(long *dst, long *src)
> +static inline int do_copy_page(long *dst, long *src)

Make it bool, please.

>  {
>         int n;
> +       long z = 0;

Please put this above the other declaration.

>
> -       for (n = PAGE_SIZE / sizeof(long); n; n--)
> +       for (n = PAGE_SIZE / sizeof(long); n; n--) {
> +               z |= *src;
>                 *dst++ = *src++;
> +       }
> +       return !z;
>  }
>
>  /**
> @@ -1389,15 +1403,17 @@ static inline void do_copy_page(long *dst, long *src)
>   * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
>   * always returns 'true'.
>   */
> -static void safe_copy_page(void *dst, struct page *s_page)
> +static int safe_copy_page(void *dst, struct page *s_page)

This can be bool too.

>  {
> +       int ret;

I'd call this "zeros_only" or similar and it can be bool.

>         if (kernel_page_present(s_page)) {
> -               do_copy_page(dst, page_address(s_page));
> +               ret = do_copy_page(dst, page_address(s_page));
>         } else {
>                 hibernate_map_page(s_page);
> -               do_copy_page(dst, page_address(s_page));
> +               ret = do_copy_page(dst, page_address(s_page));
>                 hibernate_unmap_page(s_page);
>         }
> +       return ret;
>  }
>
>  #ifdef CONFIG_HIGHMEM
> @@ -1407,17 +1423,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
>                 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
>  }
>
> -static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> +static int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)

This can be bool.

>  {
>         struct page *s_page, *d_page;
>         void *src, *dst;
> +       int ret;

And this can be bool and please rename it to something more meaningful
like above.

>
>         s_page = pfn_to_page(src_pfn);
>         d_page = pfn_to_page(dst_pfn);
>         if (PageHighMem(s_page)) {
>                 src = kmap_atomic(s_page);
>                 dst = kmap_atomic(d_page);
> -               do_copy_page(dst, src);
> +               ret = do_copy_page(dst, src);
>                 kunmap_atomic(dst);
>                 kunmap_atomic(src);
>         } else {
> @@ -1426,30 +1443,38 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>                          * The page pointed to by src may contain some kernel
>                          * data modified by kmap_atomic()
>                          */
> -                       safe_copy_page(buffer, s_page);
> +                       ret = safe_copy_page(buffer, s_page);
>                         dst = kmap_atomic(d_page);
>                         copy_page(dst, buffer);
>                         kunmap_atomic(dst);
>                 } else {
> -                       safe_copy_page(page_address(d_page), s_page);
> +                       ret = safe_copy_page(page_address(d_page), s_page);
>                 }
>         }
> +       return ret;
>  }
>  #else
>  #define page_is_saveable(zone, pfn)    saveable_page(zone, pfn)
>
> -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)

This can be bool.

>  {
> -       safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> +       return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
>                                 pfn_to_page(src_pfn));
>  }
>  #endif /* CONFIG_HIGHMEM */
>
> -static void copy_data_pages(struct memory_bitmap *copy_bm,
> -                           struct memory_bitmap *orig_bm)
> +/*
> + * Copy data pages will copy all pages into pages pulled from the copy_bm.
> + * If a page was entirely filled with zeros it will be marked in the zero_bm.
> + *
> + * Returns the number of pages copied.
> + */
> +static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
> +                           struct memory_bitmap *orig_bm,
> +                           struct memory_bitmap *zero_bm)
>  {
>         struct zone *zone;
> -       unsigned long pfn;
> +       unsigned long pfn, copy_pfn, copied_pages = 0;

Please put this above the other declaration.

>
>         for_each_populated_zone(zone) {
>                 unsigned long max_zone_pfn;
> @@ -1462,18 +1487,30 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
>         }
>         memory_bm_position_reset(orig_bm);
>         memory_bm_position_reset(copy_bm);
> +       copy_pfn = memory_bm_next_pfn(copy_bm);
>         for(;;) {
>                 pfn = memory_bm_next_pfn(orig_bm);
>                 if (unlikely(pfn == BM_END_OF_MAP))
>                         break;
> -               copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
> +               if (copy_data_page(copy_pfn, pfn)) {
> +                       memory_bm_set_bit(zero_bm, pfn);
> +
> +                       /* We will reuse this copy_pfn for a real 'nonzero' page. */

I'd prefer something like "This copy_pfn will be reused for a page
that is not full of zeros."

> +                       continue;
> +               }
> +               copied_pages++;
> +               copy_pfn = memory_bm_next_pfn(copy_bm);
>         }
> +       return copied_pages;
>  }
>
>  /* Total number of image pages */
>  static unsigned int nr_copy_pages;
>  /* Number of pages needed for saving the original pfns of the image pages */
>  static unsigned int nr_meta_pages;
> +/* Number of zero pages */
> +static unsigned int nr_zero_pages;
> +
>  /*
>   * Numbers of normal and highmem page frames allocated for hibernation image
>   * before suspending devices.
> @@ -1494,6 +1531,9 @@ static struct memory_bitmap orig_bm;
>   */
>  static struct memory_bitmap copy_bm;
>
> +/* Memory bitmap which tracks which saveable pages were zero filled. */
> +static struct memory_bitmap zero_bm;
> +
>  /**
>   * swsusp_free - Free pages allocated for hibernation image.
>   *
> @@ -1538,6 +1578,7 @@ void swsusp_free(void)
>  out:
>         nr_copy_pages = 0;
>         nr_meta_pages = 0;
> +       nr_zero_pages = 0;
>         restore_pblist = NULL;
>         buffer = NULL;
>         alloc_normal = 0;
> @@ -1756,8 +1797,15 @@ int hibernate_preallocate_memory(void)
>                 goto err_out;
>         }
>
> +       error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
> +       if (error) {
> +               pr_err("Cannot allocate zero bitmap\n");
> +               goto err_out;
> +       }
> +
>         alloc_normal = 0;
>         alloc_highmem = 0;
> +       nr_zero_pages = 0;
>
>         /* Count the number of saveable data pages. */
>         save_highmem = count_highmem_pages();
> @@ -2014,7 +2062,6 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
>  asmlinkage __visible int swsusp_save(void)
>  {
>         unsigned int nr_pages, nr_highmem;
> -

Unrelated change and not needed AFAICS.

>         pr_info("Creating image:\n");
>
>         drain_local_pages(NULL);
> @@ -2037,19 +2084,19 @@ asmlinkage __visible int swsusp_save(void)
>          * Kill them.
>          */
>         drain_local_pages(NULL);
> -       copy_data_pages(&copy_bm, &orig_bm);
> +       nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
>
>         /*
>          * End of critical section. From now on, we can write to memory,
>          * but we should not touch disk. This specially means we must _not_
>          * touch swap space! Except we must write out our image of course.
>          */
> -
>         nr_pages += nr_highmem;
> -       nr_copy_pages = nr_pages;
> +       /* We don't actually copy the zero pages */
> +       nr_zero_pages = nr_pages - nr_copy_pages;
>         nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
>
> -       pr_info("Image created (%d pages copied)\n", nr_pages);
> +       pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
>
>         return 0;
>  }
> @@ -2094,15 +2141,22 @@ static int init_header(struct swsusp_info *info)
>         return init_header_complete(info);
>  }
>
> +#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
> +#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
> +
>  /**
>   * pack_pfns - Prepare PFNs for saving.
>   * @bm: Memory bitmap.
>   * @buf: Memory buffer to store the PFNs in.
> + * @zero_bm: Memory bitmap containing PFNs of zero pages.
>   *
>   * PFNs corresponding to set bits in @bm are stored in the area of memory
> - * pointed to by @buf (1 page at a time).
> + * pointed to by @buf (1 page at a time). Pages which were filled with only
> + * zeros will have the highest bit set in the packed format to distinguish
> + * them from PFNs which will be contained in the image file.
>   */
> -static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
> +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
>         int j;
>
> @@ -2110,6 +2164,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
>                 buf[j] = memory_bm_next_pfn(bm);
>                 if (unlikely(buf[j] == BM_END_OF_MAP))
>                         break;
> +               if (memory_bm_test_bit(zero_bm, buf[j]))
> +                       buf[j] |= ENCODED_PFN_ZERO_FLAG;
>         }
>  }
>
> @@ -2151,7 +2207,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
>                 memory_bm_position_reset(&copy_bm);
>         } else if (handle->cur <= nr_meta_pages) {
>                 clear_page(buffer);
> -               pack_pfns(buffer, &orig_bm);
> +               pack_pfns(buffer, &orig_bm, &zero_bm);
>         } else {
>                 struct page *page;
>
> @@ -2247,24 +2303,34 @@ static int load_header(struct swsusp_info *info)
>   * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
>   * @bm: Memory bitmap.
>   * @buf: Area of memory containing the PFNs.
> + * @zero_bm: Memory bitmap with the zero PFNs marked.
>   *
>   * For each element of the array pointed to by @buf (1 page at a time), set the
> - * corresponding bit in @bm.
> + * corresponding bit in @bm. If the page was originally populated with only
> + * zeros then a corresponding bit will also be set in @zero_bm.
>   */
> -static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
> +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
> -       int j;
> +       int j, zero;

"zero" is a bool variable.

> +       unsigned long decoded_pfn;

Please put this above the other declaration.

>
>         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
>                 if (unlikely(buf[j] == BM_END_OF_MAP))
>                         break;
>
> -               if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
> -                       memory_bm_set_bit(bm, buf[j]);
> +               zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
> +               decoded_pfn = buf[j] & ENCODED_PFN_MASK;
> +               if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
> +                       memory_bm_set_bit(bm, decoded_pfn);
> +                       if (zero) {
> +                               memory_bm_set_bit(zero_bm, decoded_pfn);
> +                               nr_zero_pages++;
> +                       }
>                 } else {
> -                       if (!pfn_valid(buf[j]))
> +                       if (!pfn_valid(decoded_pfn))
>                                 pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
> -                                      (unsigned long long)PFN_PHYS(buf[j]));
> +                                      (unsigned long long)PFN_PHYS(decoded_pfn));
>                         return -EFAULT;
>                 }
>         }
> @@ -2486,6 +2552,7 @@ static inline void free_highmem_data(void) {}
>   * prepare_image - Make room for loading hibernation image.
>   * @new_bm: Uninitialized memory bitmap structure.
>   * @bm: Memory bitmap with unsafe pages marked.
> + * @zero_bm: Memory bitmap containing the zero pages.
>   *
>   * Use @bm to mark the pages that will be overwritten in the process of
>   * restoring the system memory state from the suspend image ("unsafe" pages)
> @@ -2496,11 +2563,16 @@ static inline void free_highmem_data(void) {}
>   * pages will be used for just yet.  Instead, we mark them all as allocated and
>   * create a lists of "safe" pages to be used later.  On systems with high
>   * memory a list of "safe" highmem pages is created too.
> + *
> + * Because we didn't know which pages were unsafe when we created the zero bm we
> + * make a copy of it and recreate it within safe pages.

I would prefer something like "Because it was not known which pages
were unsafe when @zero_bm was created, make a copy of it and recreate
it within safe pages."

>   */
> -static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
> +static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
>         unsigned int nr_pages, nr_highmem;
>         struct linked_page *lp;
> +       struct memory_bitmap tmp;

Please put this above the previous declaration.

>         int error;
>
>         /* If there is no highmem, the buffer will not be necessary */
> @@ -2516,6 +2588,22 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
>
>         duplicate_memory_bitmap(new_bm, bm);
>         memory_bm_free(bm, PG_UNSAFE_KEEP);
> +
> +       /* Make a copy of the zero bm so it can be created in safe pages */

s/the zero bm/zero_bm/

> +       error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
> +       if (error)
> +               goto Free;

Please add empty lines after an if () without a block in all places.

> +       duplicate_memory_bitmap(&tmp, zero_bm);
> +       memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
> +
> +       /* Recreate zero_bm in safe pages */
> +       error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
> +       if (error)
> +               goto Free;
> +       duplicate_memory_bitmap(zero_bm, &tmp);
> +       memory_bm_free(&tmp, PG_UNSAFE_KEEP);
> +       /* at this point zero_bm is in safe pages and we can use it while restoring */

"At this point zero_bm is in safe pages and it can be used for restoring."

> +
>         if (nr_highmem > 0) {
>                 error = prepare_highmem_image(bm, &nr_highmem);
>                 if (error)
> @@ -2530,7 +2618,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
>          *
>          * nr_copy_pages cannot be less than allocated_unsafe_pages too.
>          */
> -       nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
> +       nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
>         nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
>         while (nr_pages > 0) {
>                 lp = get_image_page(GFP_ATOMIC, PG_SAFE);
> @@ -2543,7 +2631,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
>                 nr_pages--;
>         }
>         /* Preallocate memory for the image */
> -       nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
> +       nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
>         while (nr_pages > 0) {
>                 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
>                 if (!lp) {
> @@ -2631,8 +2719,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
>         static struct chain_allocator ca;
>         int error = 0;
>
> +next:
>         /* Check if we have already loaded the entire image */
> -       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
> +       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
>                 return 0;
>
>         handle->sync_read = 1;
> @@ -2657,19 +2746,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
>                 if (error)
>                         return error;
>
> +               error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
> +               if (error)
> +                       return error;
> +
> +               nr_zero_pages = 0;
> +
>                 hibernate_restore_protection_begin();
>         } else if (handle->cur <= nr_meta_pages + 1) {
> -               error = unpack_orig_pfns(buffer, &copy_bm);
> +               error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
>                 if (error)
>                         return error;
>
>                 if (handle->cur == nr_meta_pages + 1) {
> -                       error = prepare_image(&orig_bm, &copy_bm);
> +                       error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
>                         if (error)
>                                 return error;
>
>                         chain_init(&ca, GFP_ATOMIC, PG_SAFE);
>                         memory_bm_position_reset(&orig_bm);
> +                       memory_bm_position_reset(&zero_bm);
>                         restore_pblist = NULL;
>                         handle->buffer = get_buffer(&orig_bm, &ca);
>                         handle->sync_read = 0;
> @@ -2686,6 +2782,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
>                         handle->sync_read = 0;
>         }
>         handle->cur++;
> +
> +       /* Zero pages were not included in the image, memset it and move on. */
> +       if ((handle->cur > (nr_meta_pages + 1)) &&
> +                       memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
> +               memset(handle->buffer, 0, PAGE_SIZE);
> +               goto next;
> +       }
> +
>         return PAGE_SIZE;
>  }
>
> @@ -2702,7 +2806,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
>         copy_last_highmem_page();
>         hibernate_restore_protect_page(handle->buffer);
>         /* Do that only if we have loaded the image entirely */
> -       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
> +       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
>                 memory_bm_recycle(&orig_bm);
>                 free_highmem_data();
>         }
> --

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] PM: hibernate: don't store zero pages in the image file.
  2023-01-13 19:30 [PATCH] PM: hibernate: don't store zero pages in the image file Brian Geffon
                   ` (2 preceding siblings ...)
  2023-04-06 18:20 ` [PATCH v4] " Brian Geffon
@ 2023-06-26 15:44 ` Brian Geffon
  2023-06-26 15:47 ` [PATCH v6] " Brian Geffon
  2023-07-14 17:55 ` [PATCH v7] " Brian Geffon
  5 siblings, 0 replies; 16+ messages in thread
From: Brian Geffon @ 2023-06-26 15:44 UTC (permalink / raw)
  To: Rafael J . Wysocki
  Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel,
	Matthias Kaehlcke, Brian Geffon

On ChromeOS we've observed a considerable number of in-use pages filled with
zeros. Today with hibernate it's entirely possible that saveable pages are just
zero filled. Since we're already copying pages word-by-word in do_copy_page it
becomes almost free to determine if a page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages. If a page
is zero it will not be included in the saved image, instead to track these zero
pages in the image file we will introduce a new flag which we will set on the
packed PFN list. When reading back in the image file we will detect these zero
page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to write_next_page if we encounter
a zero page we will silently memset it to 0 and then continue on to the next
page. Given the implementation in snapshot_read_next/snapshot_write_next this
change  will be transparent to non-compressed/compressed and swsusp modes of
operation.

To provide some concrete numbers from simple ad-hoc testing, on a device which
was lightly in use we saw that:

PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
and could be tracked entirely within the packed PFN list. The savings would
obviously be much lower for lzo compressed images, but even in the case of
compression not copying pages across to the compression threads will still
speed things up. It's also possible that we would see better overall compression
ratios as larger regions of "real data" would improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

Patch v4 -> v5:
- Addressed numerous style comments from Rafael J. Wysocki.

Patch v3 -> v4:
- Suggestions from Matthias Kaehlcke:
 - Return number of copy pages from copy_data_pages
 - Use an explicit temporary bitmap while moving the zerm_bm
   to safe pages.

Patch v2 -> v3:
- Use nr_zero_pages rather than walking each pfn to count.
- Make sure zero_bm is allocated in safe pages on resume.
 When reading in the pfn list and building the zero page bm
 we don't know which pages are unsafe yet so we will need to
 copy this bm to safe pages after the metadata has been read.

Patch v1 -> v2:
- minor code mistake from rebasing corrected.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 kernel/power/snapshot.c | 184 ++++++++++++++++++++++++++++++++--------
 1 file changed, 147 insertions(+), 37 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..4da53d470c82 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 	bm->cur.zone = zone;
 	bm->cur.node = node;
 	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur.cur_pfn = pfn;
 
 	/* Set return values */
 	*addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns true if the page was filled with only zeros, otherwise false.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline bool do_copy_page(long *dst, long *src)
 {
+	long z = 0;
 	int n;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1387,17 +1401,20 @@ static inline void do_copy_page(long *dst, long *src)
  * Check if the page we are going to copy is marked as present in the kernel
  * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
- * always returns 'true'.
+ * always returns 'true'. Returns true if the page was entirely composed of zeros
+ * otherwise it will return false.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static bool safe_copy_page(void *dst, struct page *s_page)
 {
+	bool zeros_only;
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return zeros_only;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1407,17 +1424,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	bool zeros_only;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		zeros_only = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1426,30 +1444,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			zeros_only = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			zeros_only = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return ret;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+/*
+ * Copy data pages will copy all pages into pages pulled from the copy_bm.
+ * If a page was entirely filled with zeros it will be marked in the zero_bm.
+ *
+ * Returns the number of pages copied.
+ */
+static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm)
 {
+	unsigned long copied_pages = 0;
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1462,18 +1489,30 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+
+			/* Use this copy_pfn for a page that is not full of zeros */
+			continue;
+		}
+		copied_pages++;
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
+	return copied_pages;
 }
 
 /* Total number of image pages */
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/* Number of zero pages */
+static unsigned int nr_zero_pages;
+
 /*
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
@@ -1494,6 +1533,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1538,6 +1580,7 @@ void swsusp_free(void)
 out:
 	nr_copy_pages = 0;
 	nr_meta_pages = 0;
+	nr_zero_pages = 0;
 	restore_pblist = NULL;
 	buffer = NULL;
 	alloc_normal = 0;
@@ -1756,8 +1799,15 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	nr_zero_pages = 0;
 
 	/* Count the number of saveable data pages. */
 	save_highmem = count_highmem_pages();
@@ -2037,19 +2087,19 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
-
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_zero_pages = nr_pages - nr_copy_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2094,15 +2144,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2110,6 +2167,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2151,7 +2210,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2247,24 +2306,35 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap with the zero PFNs marked.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
+	unsigned long decoded_pfn;
+        bool zero;
 	int j;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
+			if (zero) {
+				memory_bm_set_bit(zero_bm, decoded_pfn);
+				nr_zero_pages++;
+			}
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2486,6 +2556,7 @@ static inline void free_highmem_data(void) {}
  * prepare_image - Make room for loading hibernation image.
  * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
+ * @zero_bm: Memory bitmap containing the zero pages.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
  * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2496,10 +2567,15 @@ static inline void free_highmem_data(void) {}
  * pages will be used for just yet.  Instead, we mark them all as allocated and
  * create a lists of "safe" pages to be used later.  On systems with high
  * memory a list of "safe" highmem pages is created too.
+ *
+ * Because it was not known which pages were unsafe when @zero_bm was created,
+ * make a copy of it and recreate it within safe pages.
  */
-static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	unsigned int nr_pages, nr_highmem;
+	struct memory_bitmap tmp;
 	struct linked_page *lp;
 	int error;
 
@@ -2516,6 +2592,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
 	duplicate_memory_bitmap(new_bm, bm);
 	memory_bm_free(bm, PG_UNSAFE_KEEP);
+
+	/* Make a copy of zero_bm so it can be created in safe pages */
+	error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(&tmp, zero_bm);
+	memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
+
+	/* Recreate zero_bm in safe pages */
+	error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(zero_bm, &tmp);
+	memory_bm_free(&tmp, PG_UNSAFE_KEEP);
+	/* At this point zero_bm is in safe pages and it can be used for restoring. */
+
 	if (nr_highmem > 0) {
 		error = prepare_highmem_image(bm, &nr_highmem);
 		if (error)
@@ -2530,7 +2624,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	 *
 	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2543,7 +2637,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
 		if (!lp) {
@@ -2631,8 +2725,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
 		return 0;
 
 	handle->sync_read = 1;
@@ -2657,19 +2752,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
+		nr_zero_pages = 0;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
 		if (handle->cur == nr_meta_pages + 1) {
-			error = prepare_image(&orig_bm, &copy_bm);
+			error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
 			if (error)
 				return error;
 
 			chain_init(&ca, GFP_ATOMIC, PG_SAFE);
 			memory_bm_position_reset(&orig_bm);
+			memory_bm_position_reset(&zero_bm);
 			restore_pblist = NULL;
 			handle->buffer = get_buffer(&orig_bm, &ca);
 			handle->sync_read = 0;
@@ -2686,6 +2788,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if ((handle->cur > (nr_meta_pages + 1)) &&
+			memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
@@ -2702,7 +2812,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 	copy_last_highmem_page();
 	hibernate_restore_protect_page(handle->buffer);
 	/* Do that only if we have loaded the image entirely */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
 		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
-- 
2.41.0.162.gfafddb0af9-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v6] PM: hibernate: don't store zero pages in the image file.
  2023-01-13 19:30 [PATCH] PM: hibernate: don't store zero pages in the image file Brian Geffon
                   ` (3 preceding siblings ...)
  2023-06-26 15:44 ` [PATCH] " Brian Geffon
@ 2023-06-26 15:47 ` Brian Geffon
  2023-07-14 17:55 ` [PATCH v7] " Brian Geffon
  5 siblings, 0 replies; 16+ messages in thread
From: Brian Geffon @ 2023-06-26 15:47 UTC (permalink / raw)
  To: Rafael J . Wysocki
  Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel,
	Matthias Kaehlcke, Brian Geffon

On ChromeOS we've observed a considerable number of in-use pages filled with
zeros. Today with hibernate it's entirely possible that saveable pages are just
zero filled. Since we're already copying pages word-by-word in do_copy_page it
becomes almost free to determine if a page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages. If a page
is zero it will not be included in the saved image, instead to track these zero
pages in the image file we will introduce a new flag which we will set on the
packed PFN list. When reading back in the image file we will detect these zero
page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to write_next_page if we encounter
a zero page we will silently memset it to 0 and then continue on to the next
page. Given the implementation in snapshot_read_next/snapshot_write_next this
change  will be transparent to non-compressed/compressed and swsusp modes of
operation.

To provide some concrete numbers from simple ad-hoc testing, on a device which
was lightly in use we saw that:

PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
and could be tracked entirely within the packed PFN list. The savings would
obviously be much lower for lzo compressed images, but even in the case of
compression not copying pages across to the compression threads will still
speed things up. It's also possible that we would see better overall compression
ratios as larger regions of "real data" would improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

Patch v5 -> v6:
- Correcting missed variable when changing types.

Patch v4 -> v5:
- Addressed numerous style comments from Rafael J. Wysocki.

Patch v3 -> v4:
- Suggestions from Matthias Kaehlcke:
 - Return number of copy pages from copy_data_pages
 - Use an explicit temporary bitmap while moving the zerm_bm
   to safe pages.

Patch v2 -> v3:
- Use nr_zero_pages rather than walking each pfn to count.
- Make sure zero_bm is allocated in safe pages on resume.
 When reading in the pfn list and building the zero page bm
 we don't know which pages are unsafe yet so we will need to
 copy this bm to safe pages after the metadata has been read.

Patch v1 -> v2:
- minor code mistake from rebasing corrected.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 kernel/power/snapshot.c | 184 ++++++++++++++++++++++++++++++++--------
 1 file changed, 147 insertions(+), 37 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..294c75646ab0 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 	bm->cur.zone = zone;
 	bm->cur.node = node;
 	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur.cur_pfn = pfn;
 
 	/* Set return values */
 	*addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns true if the page was filled with only zeros, otherwise false.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline bool do_copy_page(long *dst, long *src)
 {
+	long z = 0;
 	int n;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1387,17 +1401,20 @@ static inline void do_copy_page(long *dst, long *src)
  * Check if the page we are going to copy is marked as present in the kernel
  * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
- * always returns 'true'.
+ * always returns 'true'. Returns true if the page was entirely composed of zeros
+ * otherwise it will return false.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static bool safe_copy_page(void *dst, struct page *s_page)
 {
+	bool zeros_only;
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return zeros_only;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1407,17 +1424,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	bool zeros_only;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		zeros_only = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1426,30 +1444,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			zeros_only = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			zeros_only = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return zeros_only;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+/*
+ * Copy data pages will copy all pages into pages pulled from the copy_bm.
+ * If a page was entirely filled with zeros it will be marked in the zero_bm.
+ *
+ * Returns the number of pages copied.
+ */
+static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm)
 {
+	unsigned long copied_pages = 0;
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1462,18 +1489,30 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+
+			/* Use this copy_pfn for a page that is not full of zeros */
+			continue;
+		}
+		copied_pages++;
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
+	return copied_pages;
 }
 
 /* Total number of image pages */
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/* Number of zero pages */
+static unsigned int nr_zero_pages;
+
 /*
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
@@ -1494,6 +1533,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1538,6 +1580,7 @@ void swsusp_free(void)
 out:
 	nr_copy_pages = 0;
 	nr_meta_pages = 0;
+	nr_zero_pages = 0;
 	restore_pblist = NULL;
 	buffer = NULL;
 	alloc_normal = 0;
@@ -1756,8 +1799,15 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	nr_zero_pages = 0;
 
 	/* Count the number of saveable data pages. */
 	save_highmem = count_highmem_pages();
@@ -2037,19 +2087,19 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
-
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_zero_pages = nr_pages - nr_copy_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2094,15 +2144,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2110,6 +2167,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2151,7 +2210,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2247,24 +2306,35 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap with the zero PFNs marked.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
+	unsigned long decoded_pfn;
+        bool zero;
 	int j;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
+			if (zero) {
+				memory_bm_set_bit(zero_bm, decoded_pfn);
+				nr_zero_pages++;
+			}
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2486,6 +2556,7 @@ static inline void free_highmem_data(void) {}
  * prepare_image - Make room for loading hibernation image.
  * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
+ * @zero_bm: Memory bitmap containing the zero pages.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
  * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2496,10 +2567,15 @@ static inline void free_highmem_data(void) {}
  * pages will be used for just yet.  Instead, we mark them all as allocated and
  * create a lists of "safe" pages to be used later.  On systems with high
  * memory a list of "safe" highmem pages is created too.
+ *
+ * Because it was not known which pages were unsafe when @zero_bm was created,
+ * make a copy of it and recreate it within safe pages.
  */
-static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	unsigned int nr_pages, nr_highmem;
+	struct memory_bitmap tmp;
 	struct linked_page *lp;
 	int error;
 
@@ -2516,6 +2592,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
 	duplicate_memory_bitmap(new_bm, bm);
 	memory_bm_free(bm, PG_UNSAFE_KEEP);
+
+	/* Make a copy of zero_bm so it can be created in safe pages */
+	error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(&tmp, zero_bm);
+	memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
+
+	/* Recreate zero_bm in safe pages */
+	error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(zero_bm, &tmp);
+	memory_bm_free(&tmp, PG_UNSAFE_KEEP);
+	/* At this point zero_bm is in safe pages and it can be used for restoring. */
+
 	if (nr_highmem > 0) {
 		error = prepare_highmem_image(bm, &nr_highmem);
 		if (error)
@@ -2530,7 +2624,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	 *
 	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2543,7 +2637,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
 		if (!lp) {
@@ -2631,8 +2725,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
 		return 0;
 
 	handle->sync_read = 1;
@@ -2657,19 +2752,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
+		nr_zero_pages = 0;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
 		if (handle->cur == nr_meta_pages + 1) {
-			error = prepare_image(&orig_bm, &copy_bm);
+			error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
 			if (error)
 				return error;
 
 			chain_init(&ca, GFP_ATOMIC, PG_SAFE);
 			memory_bm_position_reset(&orig_bm);
+			memory_bm_position_reset(&zero_bm);
 			restore_pblist = NULL;
 			handle->buffer = get_buffer(&orig_bm, &ca);
 			handle->sync_read = 0;
@@ -2686,6 +2788,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if ((handle->cur > (nr_meta_pages + 1)) &&
+			memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
@@ -2702,7 +2812,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 	copy_last_highmem_page();
 	hibernate_restore_protect_page(handle->buffer);
 	/* Do that only if we have loaded the image entirely */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
 		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
-- 
2.41.0.162.gfafddb0af9-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v7] PM: hibernate: don't store zero pages in the image file.
  2023-01-13 19:30 [PATCH] PM: hibernate: don't store zero pages in the image file Brian Geffon
                   ` (4 preceding siblings ...)
  2023-06-26 15:47 ` [PATCH v6] " Brian Geffon
@ 2023-07-14 17:55 ` Brian Geffon
  2023-07-20 18:18   ` Rafael J. Wysocki
  5 siblings, 1 reply; 16+ messages in thread
From: Brian Geffon @ 2023-07-14 17:55 UTC (permalink / raw)
  To: Rafael J . Wysocki
  Cc: Pavel Machek, Len Brown, linux-pm, linux-kernel,
	Matthias Kaehlcke, Brian Geffon

On ChromeOS we've observed a considerable number of in-use pages filled with
zeros. Today with hibernate it's entirely possible that saveable pages are just
zero filled. Since we're already copying pages word-by-word in do_copy_page it
becomes almost free to determine if a page was completely filled with zeros.

This change introduces a new bitmap which will track these zero pages. If a page
is zero it will not be included in the saved image, instead to track these zero
pages in the image file we will introduce a new flag which we will set on the
packed PFN list. When reading back in the image file we will detect these zero
page PFNs and rebuild the zero page bitmap.

When the image is being loaded through calls to write_next_page if we encounter
a zero page we will silently memset it to 0 and then continue on to the next
page. Given the implementation in snapshot_read_next/snapshot_write_next this
change  will be transparent to non-compressed/compressed and swsusp modes of
operation.

To provide some concrete numbers from simple ad-hoc testing, on a device which
was lightly in use we saw that:

PM: hibernation: Image created (964408 pages copied, 548304 zero pages)

Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
and could be tracked entirely within the packed PFN list. The savings would
obviously be much lower for lzo compressed images, but even in the case of
compression not copying pages across to the compression threads will still
speed things up. It's also possible that we would see better overall compression
ratios as larger regions of "real data" would improve the compressibility.

Finally, such an approach could dramatically improve swsusp performance
as each one of those zero pages requires a write syscall to reload, by
handling it as part of the packed PFN list we're able to fully avoid
that.

Patch v6 -> v7:
- Fix a bug in image_loaded() not accounting for zero pages.

Patch v5 -> v6:
- Correcting missed variable when changing types.

Patch v4 -> v5:
- Addressed numerous style comments from Rafael J. Wysocki.

Patch v3 -> v4:
- Suggestions from Matthias Kaehlcke:
 - Return number of copy pages from copy_data_pages
 - Use an explicit temporary bitmap while moving the zerm_bm
   to safe pages.

Patch v2 -> v3:
- Use nr_zero_pages rather than walking each pfn to count.
- Make sure zero_bm is allocated in safe pages on resume.
 When reading in the pfn list and building the zero page bm
 we don't know which pages are unsafe yet so we will need to
 copy this bm to safe pages after the metadata has been read.

Patch v1 -> v2:
- minor code mistake from rebasing corrected.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
 kernel/power/snapshot.c | 186 ++++++++++++++++++++++++++++++++--------
 1 file changed, 148 insertions(+), 38 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..1f2a052b56b8 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -404,6 +404,7 @@ struct bm_position {
 	struct mem_zone_bm_rtree *zone;
 	struct rtree_node *node;
 	unsigned long node_pfn;
+	unsigned long cur_pfn;
 	int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
 	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 				  struct rtree_node, list);
 	bm->cur.node_pfn = 0;
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	bm->cur.node_bit = 0;
 }
 
@@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 	bm->cur.zone = zone;
 	bm->cur.node = node;
 	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+	bm->cur.cur_pfn = pfn;
 
 	/* Set return values */
 	*addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
 	clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+	return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 	void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 		if (bit < bits) {
 			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
 			bm->cur.node_bit = bit + 1;
+			bm->cur.cur_pfn = pfn;
 			return pfn;
 		}
 	} while (rtree_next_node(bm));
 
+	bm->cur.cur_pfn = BM_END_OF_MAP;
 	return BM_END_OF_MAP;
 }
 
@@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns true if the page was filled with only zeros, otherwise false.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline bool do_copy_page(long *dst, long *src)
 {
+	long z = 0;
 	int n;
 
-	for (n = PAGE_SIZE / sizeof(long); n; n--)
+	for (n = PAGE_SIZE / sizeof(long); n; n--) {
+		z |= *src;
 		*dst++ = *src++;
+	}
+	return !z;
 }
 
 /**
@@ -1387,17 +1401,20 @@ static inline void do_copy_page(long *dst, long *src)
  * Check if the page we are going to copy is marked as present in the kernel
  * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
- * always returns 'true'.
+ * always returns 'true'. Returns true if the page was entirely composed of zeros
+ * otherwise it will return false.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static bool safe_copy_page(void *dst, struct page *s_page)
 {
+	bool zeros_only;
 	if (kernel_page_present(s_page)) {
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 	} else {
 		hibernate_map_page(s_page);
-		do_copy_page(dst, page_address(s_page));
+		zeros_only = do_copy_page(dst, page_address(s_page));
 		hibernate_unmap_page(s_page);
 	}
+	return zeros_only;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1407,17 +1424,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
 	struct page *s_page, *d_page;
 	void *src, *dst;
+	bool zeros_only;
 
 	s_page = pfn_to_page(src_pfn);
 	d_page = pfn_to_page(dst_pfn);
 	if (PageHighMem(s_page)) {
 		src = kmap_atomic(s_page);
 		dst = kmap_atomic(d_page);
-		do_copy_page(dst, src);
+		zeros_only = do_copy_page(dst, src);
 		kunmap_atomic(dst);
 		kunmap_atomic(src);
 	} else {
@@ -1426,30 +1444,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
-			safe_copy_page(buffer, s_page);
+			zeros_only = safe_copy_page(buffer, s_page);
 			dst = kmap_atomic(d_page);
 			copy_page(dst, buffer);
 			kunmap_atomic(dst);
 		} else {
-			safe_copy_page(page_address(d_page), s_page);
+			zeros_only = safe_copy_page(page_address(d_page), s_page);
 		}
 	}
+	return zeros_only;
 }
 #else
 #define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+	return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
 				pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void copy_data_pages(struct memory_bitmap *copy_bm,
-			    struct memory_bitmap *orig_bm)
+/*
+ * Copy data pages will copy all pages into pages pulled from the copy_bm.
+ * If a page was entirely filled with zeros it will be marked in the zero_bm.
+ *
+ * Returns the number of pages copied.
+ */
+static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
+			    struct memory_bitmap *orig_bm,
+			    struct memory_bitmap *zero_bm)
 {
+	unsigned long copied_pages = 0;
 	struct zone *zone;
-	unsigned long pfn;
+	unsigned long pfn, copy_pfn;
 
 	for_each_populated_zone(zone) {
 		unsigned long max_zone_pfn;
@@ -1462,18 +1489,30 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
 	}
 	memory_bm_position_reset(orig_bm);
 	memory_bm_position_reset(copy_bm);
+	copy_pfn = memory_bm_next_pfn(copy_bm);
 	for(;;) {
 		pfn = memory_bm_next_pfn(orig_bm);
 		if (unlikely(pfn == BM_END_OF_MAP))
 			break;
-		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+		if (copy_data_page(copy_pfn, pfn)) {
+			memory_bm_set_bit(zero_bm, pfn);
+
+			/* Use this copy_pfn for a page that is not full of zeros */
+			continue;
+		}
+		copied_pages++;
+		copy_pfn = memory_bm_next_pfn(copy_bm);
 	}
+	return copied_pages;
 }
 
 /* Total number of image pages */
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/* Number of zero pages */
+static unsigned int nr_zero_pages;
+
 /*
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
@@ -1494,6 +1533,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1538,6 +1580,7 @@ void swsusp_free(void)
 out:
 	nr_copy_pages = 0;
 	nr_meta_pages = 0;
+	nr_zero_pages = 0;
 	restore_pblist = NULL;
 	buffer = NULL;
 	alloc_normal = 0;
@@ -1756,8 +1799,15 @@ int hibernate_preallocate_memory(void)
 		goto err_out;
 	}
 
+	error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+	if (error) {
+		pr_err("Cannot allocate zero bitmap\n");
+		goto err_out;
+	}
+
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	nr_zero_pages = 0;
 
 	/* Count the number of saveable data pages. */
 	save_highmem = count_highmem_pages();
@@ -2037,19 +2087,19 @@ asmlinkage __visible int swsusp_save(void)
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
-	copy_data_pages(&copy_bm, &orig_bm);
+	nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
 	/*
 	 * End of critical section. From now on, we can write to memory,
 	 * but we should not touch disk. This specially means we must _not_
 	 * touch swap space! Except we must write out our image of course.
 	 */
-
 	nr_pages += nr_highmem;
-	nr_copy_pages = nr_pages;
+	/* We don't actually copy the zero pages */
+	nr_zero_pages = nr_pages - nr_copy_pages;
 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-	pr_info("Image created (%d pages copied)\n", nr_pages);
+	pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
 	return 0;
 }
@@ -2094,15 +2144,22 @@ static int init_header(struct swsusp_info *info)
 	return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	int j;
 
@@ -2110,6 +2167,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		buf[j] = memory_bm_next_pfn(bm);
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
+		if (memory_bm_test_bit(zero_bm, buf[j]))
+			buf[j] |= ENCODED_PFN_ZERO_FLAG;
 	}
 }
 
@@ -2151,7 +2210,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
 		memory_bm_position_reset(&copy_bm);
 	} else if (handle->cur <= nr_meta_pages) {
 		clear_page(buffer);
-		pack_pfns(buffer, &orig_bm);
+		pack_pfns(buffer, &orig_bm, &zero_bm);
 	} else {
 		struct page *page;
 
@@ -2247,24 +2306,35 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap with the zero PFNs marked.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
+	unsigned long decoded_pfn;
+        bool zero;
 	int j;
 
 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
 		if (unlikely(buf[j] == BM_END_OF_MAP))
 			break;
 
-		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-			memory_bm_set_bit(bm, buf[j]);
+		zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+		decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+		if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+			memory_bm_set_bit(bm, decoded_pfn);
+			if (zero) {
+				memory_bm_set_bit(zero_bm, decoded_pfn);
+				nr_zero_pages++;
+			}
 		} else {
-			if (!pfn_valid(buf[j]))
+			if (!pfn_valid(decoded_pfn))
 				pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-				       (unsigned long long)PFN_PHYS(buf[j]));
+				       (unsigned long long)PFN_PHYS(decoded_pfn));
 			return -EFAULT;
 		}
 	}
@@ -2486,6 +2556,7 @@ static inline void free_highmem_data(void) {}
  * prepare_image - Make room for loading hibernation image.
  * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
+ * @zero_bm: Memory bitmap containing the zero pages.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
  * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2496,10 +2567,15 @@ static inline void free_highmem_data(void) {}
  * pages will be used for just yet.  Instead, we mark them all as allocated and
  * create a lists of "safe" pages to be used later.  On systems with high
  * memory a list of "safe" highmem pages is created too.
+ *
+ * Because it was not known which pages were unsafe when @zero_bm was created,
+ * make a copy of it and recreate it within safe pages.
  */
-static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
+		struct memory_bitmap *zero_bm)
 {
 	unsigned int nr_pages, nr_highmem;
+	struct memory_bitmap tmp;
 	struct linked_page *lp;
 	int error;
 
@@ -2516,6 +2592,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
 	duplicate_memory_bitmap(new_bm, bm);
 	memory_bm_free(bm, PG_UNSAFE_KEEP);
+
+	/* Make a copy of zero_bm so it can be created in safe pages */
+	error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(&tmp, zero_bm);
+	memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
+
+	/* Recreate zero_bm in safe pages */
+	error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
+	if (error)
+		goto Free;
+
+	duplicate_memory_bitmap(zero_bm, &tmp);
+	memory_bm_free(&tmp, PG_UNSAFE_KEEP);
+	/* At this point zero_bm is in safe pages and it can be used for restoring. */
+
 	if (nr_highmem > 0) {
 		error = prepare_highmem_image(bm, &nr_highmem);
 		if (error)
@@ -2530,7 +2624,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	 *
 	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2543,7 +2637,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+	nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
 		if (!lp) {
@@ -2631,8 +2725,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
 	static struct chain_allocator ca;
 	int error = 0;
 
+next:
 	/* Check if we have already loaded the entire image */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
 		return 0;
 
 	handle->sync_read = 1;
@@ -2657,19 +2752,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+		if (error)
+			return error;
+
+		nr_zero_pages = 0;
+
 		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
-		error = unpack_orig_pfns(buffer, &copy_bm);
+		error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
 		if (error)
 			return error;
 
 		if (handle->cur == nr_meta_pages + 1) {
-			error = prepare_image(&orig_bm, &copy_bm);
+			error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
 			if (error)
 				return error;
 
 			chain_init(&ca, GFP_ATOMIC, PG_SAFE);
 			memory_bm_position_reset(&orig_bm);
+			memory_bm_position_reset(&zero_bm);
 			restore_pblist = NULL;
 			handle->buffer = get_buffer(&orig_bm, &ca);
 			handle->sync_read = 0;
@@ -2686,6 +2788,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
 			handle->sync_read = 0;
 	}
 	handle->cur++;
+
+	/* Zero pages were not included in the image, memset it and move on. */
+	if ((handle->cur > (nr_meta_pages + 1)) &&
+			memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+		memset(handle->buffer, 0, PAGE_SIZE);
+		goto next;
+	}
+
 	return PAGE_SIZE;
 }
 
@@ -2702,7 +2812,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 	copy_last_highmem_page();
 	hibernate_restore_protect_page(handle->buffer);
 	/* Do that only if we have loaded the image entirely */
-	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
 		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
@@ -2711,7 +2821,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 int snapshot_image_loaded(struct snapshot_handle *handle)
 {
 	return !(!nr_copy_pages || !last_highmem_page_copied() ||
-			handle->cur <= nr_meta_pages + nr_copy_pages);
+			handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages);
 }
 
 #ifdef CONFIG_HIGHMEM
-- 
2.41.0.255.g8b1d071c50-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v7] PM: hibernate: don't store zero pages in the image file.
  2023-07-14 17:55 ` [PATCH v7] " Brian Geffon
@ 2023-07-20 18:18   ` Rafael J. Wysocki
  0 siblings, 0 replies; 16+ messages in thread
From: Rafael J. Wysocki @ 2023-07-20 18:18 UTC (permalink / raw)
  To: Brian Geffon
  Cc: Rafael J . Wysocki, Pavel Machek, Len Brown, linux-pm,
	linux-kernel, Matthias Kaehlcke

On Fri, Jul 14, 2023 at 7:55 PM Brian Geffon <bgeffon@google.com> wrote:
>
> On ChromeOS we've observed a considerable number of in-use pages filled with
> zeros. Today with hibernate it's entirely possible that saveable pages are just
> zero filled. Since we're already copying pages word-by-word in do_copy_page it
> becomes almost free to determine if a page was completely filled with zeros.
>
> This change introduces a new bitmap which will track these zero pages. If a page
> is zero it will not be included in the saved image, instead to track these zero
> pages in the image file we will introduce a new flag which we will set on the
> packed PFN list. When reading back in the image file we will detect these zero
> page PFNs and rebuild the zero page bitmap.
>
> When the image is being loaded through calls to write_next_page if we encounter
> a zero page we will silently memset it to 0 and then continue on to the next
> page. Given the implementation in snapshot_read_next/snapshot_write_next this
> change  will be transparent to non-compressed/compressed and swsusp modes of
> operation.
>
> To provide some concrete numbers from simple ad-hoc testing, on a device which
> was lightly in use we saw that:
>
> PM: hibernation: Image created (964408 pages copied, 548304 zero pages)
>
> Of the approximately 6.2GB of saveable pages 2.2GB (36%) were just zero filled
> and could be tracked entirely within the packed PFN list. The savings would
> obviously be much lower for lzo compressed images, but even in the case of
> compression not copying pages across to the compression threads will still
> speed things up. It's also possible that we would see better overall compression
> ratios as larger regions of "real data" would improve the compressibility.
>
> Finally, such an approach could dramatically improve swsusp performance
> as each one of those zero pages requires a write syscall to reload, by
> handling it as part of the packed PFN list we're able to fully avoid
> that.
>
> Patch v6 -> v7:
> - Fix a bug in image_loaded() not accounting for zero pages.
>
> Patch v5 -> v6:
> - Correcting missed variable when changing types.
>
> Patch v4 -> v5:
> - Addressed numerous style comments from Rafael J. Wysocki.
>
> Patch v3 -> v4:
> - Suggestions from Matthias Kaehlcke:
>  - Return number of copy pages from copy_data_pages
>  - Use an explicit temporary bitmap while moving the zerm_bm
>    to safe pages.
>
> Patch v2 -> v3:
> - Use nr_zero_pages rather than walking each pfn to count.
> - Make sure zero_bm is allocated in safe pages on resume.
>  When reading in the pfn list and building the zero page bm
>  we don't know which pages are unsafe yet so we will need to
>  copy this bm to safe pages after the metadata has been read.
>
> Patch v1 -> v2:
> - minor code mistake from rebasing corrected.
>
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> ---
>  kernel/power/snapshot.c | 186 ++++++++++++++++++++++++++++++++--------
>  1 file changed, 148 insertions(+), 38 deletions(-)
>
> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> index cd8b7b35f1e8..1f2a052b56b8 100644
> --- a/kernel/power/snapshot.c
> +++ b/kernel/power/snapshot.c
> @@ -404,6 +404,7 @@ struct bm_position {
>         struct mem_zone_bm_rtree *zone;
>         struct rtree_node *node;
>         unsigned long node_pfn;
> +       unsigned long cur_pfn;
>         int node_bit;
>  };
>
> @@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
>         bm->cur.node = list_entry(bm->cur.zone->leaves.next,
>                                   struct rtree_node, list);
>         bm->cur.node_pfn = 0;
> +       bm->cur.cur_pfn = BM_END_OF_MAP;
>         bm->cur.node_bit = 0;
>  }
>
> @@ -799,6 +801,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
>         bm->cur.zone = zone;
>         bm->cur.node = node;
>         bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
> +       bm->cur.cur_pfn = pfn;
>
>         /* Set return values */
>         *addr = node->data;
> @@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
>         clear_bit(bit, bm->cur.node->data);
>  }
>
> +static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
> +{
> +       return bm->cur.cur_pfn;
> +}
> +
>  static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
>  {
>         void *addr;
> @@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
>                 if (bit < bits) {
>                         pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
>                         bm->cur.node_bit = bit + 1;
> +                       bm->cur.cur_pfn = pfn;
>                         return pfn;
>                 }
>         } while (rtree_next_node(bm));
>
> +       bm->cur.cur_pfn = BM_END_OF_MAP;
>         return BM_END_OF_MAP;
>  }
>
> @@ -1371,14 +1381,18 @@ static unsigned int count_data_pages(void)
>
>  /*
>   * This is needed, because copy_page and memcpy are not usable for copying
> - * task structs.
> + * task structs. Returns true if the page was filled with only zeros, otherwise false.
>   */
> -static inline void do_copy_page(long *dst, long *src)
> +static inline bool do_copy_page(long *dst, long *src)
>  {
> +       long z = 0;
>         int n;
>
> -       for (n = PAGE_SIZE / sizeof(long); n; n--)
> +       for (n = PAGE_SIZE / sizeof(long); n; n--) {
> +               z |= *src;
>                 *dst++ = *src++;
> +       }
> +       return !z;
>  }
>
>  /**
> @@ -1387,17 +1401,20 @@ static inline void do_copy_page(long *dst, long *src)
>   * Check if the page we are going to copy is marked as present in the kernel
>   * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
>   * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
> - * always returns 'true'.
> + * always returns 'true'. Returns true if the page was entirely composed of zeros
> + * otherwise it will return false.
>   */
> -static void safe_copy_page(void *dst, struct page *s_page)
> +static bool safe_copy_page(void *dst, struct page *s_page)
>  {
> +       bool zeros_only;
>         if (kernel_page_present(s_page)) {
> -               do_copy_page(dst, page_address(s_page));
> +               zeros_only = do_copy_page(dst, page_address(s_page));
>         } else {
>                 hibernate_map_page(s_page);
> -               do_copy_page(dst, page_address(s_page));
> +               zeros_only = do_copy_page(dst, page_address(s_page));
>                 hibernate_unmap_page(s_page);
>         }
> +       return zeros_only;
>  }
>
>  #ifdef CONFIG_HIGHMEM
> @@ -1407,17 +1424,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
>                 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
>  }
>
> -static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> +static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>  {
>         struct page *s_page, *d_page;
>         void *src, *dst;
> +       bool zeros_only;
>
>         s_page = pfn_to_page(src_pfn);
>         d_page = pfn_to_page(dst_pfn);
>         if (PageHighMem(s_page)) {
>                 src = kmap_atomic(s_page);
>                 dst = kmap_atomic(d_page);
> -               do_copy_page(dst, src);
> +               zeros_only = do_copy_page(dst, src);
>                 kunmap_atomic(dst);
>                 kunmap_atomic(src);
>         } else {
> @@ -1426,30 +1444,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>                          * The page pointed to by src may contain some kernel
>                          * data modified by kmap_atomic()
>                          */
> -                       safe_copy_page(buffer, s_page);
> +                       zeros_only = safe_copy_page(buffer, s_page);
>                         dst = kmap_atomic(d_page);
>                         copy_page(dst, buffer);
>                         kunmap_atomic(dst);
>                 } else {
> -                       safe_copy_page(page_address(d_page), s_page);
> +                       zeros_only = safe_copy_page(page_address(d_page), s_page);
>                 }
>         }
> +       return zeros_only;
>  }
>  #else
>  #define page_is_saveable(zone, pfn)    saveable_page(zone, pfn)
>
> -static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
> +static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
>  {
> -       safe_copy_page(page_address(pfn_to_page(dst_pfn)),
> +       return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
>                                 pfn_to_page(src_pfn));
>  }
>  #endif /* CONFIG_HIGHMEM */
>
> -static void copy_data_pages(struct memory_bitmap *copy_bm,
> -                           struct memory_bitmap *orig_bm)
> +/*
> + * Copy data pages will copy all pages into pages pulled from the copy_bm.
> + * If a page was entirely filled with zeros it will be marked in the zero_bm.
> + *
> + * Returns the number of pages copied.
> + */
> +static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
> +                           struct memory_bitmap *orig_bm,
> +                           struct memory_bitmap *zero_bm)
>  {
> +       unsigned long copied_pages = 0;
>         struct zone *zone;
> -       unsigned long pfn;
> +       unsigned long pfn, copy_pfn;
>
>         for_each_populated_zone(zone) {
>                 unsigned long max_zone_pfn;
> @@ -1462,18 +1489,30 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
>         }
>         memory_bm_position_reset(orig_bm);
>         memory_bm_position_reset(copy_bm);
> +       copy_pfn = memory_bm_next_pfn(copy_bm);
>         for(;;) {
>                 pfn = memory_bm_next_pfn(orig_bm);
>                 if (unlikely(pfn == BM_END_OF_MAP))
>                         break;
> -               copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
> +               if (copy_data_page(copy_pfn, pfn)) {
> +                       memory_bm_set_bit(zero_bm, pfn);
> +
> +                       /* Use this copy_pfn for a page that is not full of zeros */
> +                       continue;
> +               }
> +               copied_pages++;
> +               copy_pfn = memory_bm_next_pfn(copy_bm);
>         }
> +       return copied_pages;
>  }
>
>  /* Total number of image pages */
>  static unsigned int nr_copy_pages;
>  /* Number of pages needed for saving the original pfns of the image pages */
>  static unsigned int nr_meta_pages;
> +/* Number of zero pages */
> +static unsigned int nr_zero_pages;
> +
>  /*
>   * Numbers of normal and highmem page frames allocated for hibernation image
>   * before suspending devices.
> @@ -1494,6 +1533,9 @@ static struct memory_bitmap orig_bm;
>   */
>  static struct memory_bitmap copy_bm;
>
> +/* Memory bitmap which tracks which saveable pages were zero filled. */
> +static struct memory_bitmap zero_bm;
> +
>  /**
>   * swsusp_free - Free pages allocated for hibernation image.
>   *
> @@ -1538,6 +1580,7 @@ void swsusp_free(void)
>  out:
>         nr_copy_pages = 0;
>         nr_meta_pages = 0;
> +       nr_zero_pages = 0;
>         restore_pblist = NULL;
>         buffer = NULL;
>         alloc_normal = 0;
> @@ -1756,8 +1799,15 @@ int hibernate_preallocate_memory(void)
>                 goto err_out;
>         }
>
> +       error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
> +       if (error) {
> +               pr_err("Cannot allocate zero bitmap\n");
> +               goto err_out;
> +       }
> +
>         alloc_normal = 0;
>         alloc_highmem = 0;
> +       nr_zero_pages = 0;
>
>         /* Count the number of saveable data pages. */
>         save_highmem = count_highmem_pages();
> @@ -2037,19 +2087,19 @@ asmlinkage __visible int swsusp_save(void)
>          * Kill them.
>          */
>         drain_local_pages(NULL);
> -       copy_data_pages(&copy_bm, &orig_bm);
> +       nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
>
>         /*
>          * End of critical section. From now on, we can write to memory,
>          * but we should not touch disk. This specially means we must _not_
>          * touch swap space! Except we must write out our image of course.
>          */
> -
>         nr_pages += nr_highmem;
> -       nr_copy_pages = nr_pages;
> +       /* We don't actually copy the zero pages */
> +       nr_zero_pages = nr_pages - nr_copy_pages;
>         nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
>
> -       pr_info("Image created (%d pages copied)\n", nr_pages);
> +       pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
>
>         return 0;
>  }
> @@ -2094,15 +2144,22 @@ static int init_header(struct swsusp_info *info)
>         return init_header_complete(info);
>  }
>
> +#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
> +#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
> +
>  /**
>   * pack_pfns - Prepare PFNs for saving.
>   * @bm: Memory bitmap.
>   * @buf: Memory buffer to store the PFNs in.
> + * @zero_bm: Memory bitmap containing PFNs of zero pages.
>   *
>   * PFNs corresponding to set bits in @bm are stored in the area of memory
> - * pointed to by @buf (1 page at a time).
> + * pointed to by @buf (1 page at a time). Pages which were filled with only
> + * zeros will have the highest bit set in the packed format to distinguish
> + * them from PFNs which will be contained in the image file.
>   */
> -static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
> +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
>         int j;
>
> @@ -2110,6 +2167,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
>                 buf[j] = memory_bm_next_pfn(bm);
>                 if (unlikely(buf[j] == BM_END_OF_MAP))
>                         break;
> +               if (memory_bm_test_bit(zero_bm, buf[j]))
> +                       buf[j] |= ENCODED_PFN_ZERO_FLAG;
>         }
>  }
>
> @@ -2151,7 +2210,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
>                 memory_bm_position_reset(&copy_bm);
>         } else if (handle->cur <= nr_meta_pages) {
>                 clear_page(buffer);
> -               pack_pfns(buffer, &orig_bm);
> +               pack_pfns(buffer, &orig_bm, &zero_bm);
>         } else {
>                 struct page *page;
>
> @@ -2247,24 +2306,35 @@ static int load_header(struct swsusp_info *info)
>   * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
>   * @bm: Memory bitmap.
>   * @buf: Area of memory containing the PFNs.
> + * @zero_bm: Memory bitmap with the zero PFNs marked.
>   *
>   * For each element of the array pointed to by @buf (1 page at a time), set the
> - * corresponding bit in @bm.
> + * corresponding bit in @bm. If the page was originally populated with only
> + * zeros then a corresponding bit will also be set in @zero_bm.
>   */
> -static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
> +static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
> +       unsigned long decoded_pfn;
> +        bool zero;
>         int j;
>
>         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
>                 if (unlikely(buf[j] == BM_END_OF_MAP))
>                         break;
>
> -               if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
> -                       memory_bm_set_bit(bm, buf[j]);
> +               zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
> +               decoded_pfn = buf[j] & ENCODED_PFN_MASK;
> +               if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
> +                       memory_bm_set_bit(bm, decoded_pfn);
> +                       if (zero) {
> +                               memory_bm_set_bit(zero_bm, decoded_pfn);
> +                               nr_zero_pages++;
> +                       }
>                 } else {
> -                       if (!pfn_valid(buf[j]))
> +                       if (!pfn_valid(decoded_pfn))
>                                 pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
> -                                      (unsigned long long)PFN_PHYS(buf[j]));
> +                                      (unsigned long long)PFN_PHYS(decoded_pfn));
>                         return -EFAULT;
>                 }
>         }
> @@ -2486,6 +2556,7 @@ static inline void free_highmem_data(void) {}
>   * prepare_image - Make room for loading hibernation image.
>   * @new_bm: Uninitialized memory bitmap structure.
>   * @bm: Memory bitmap with unsafe pages marked.
> + * @zero_bm: Memory bitmap containing the zero pages.
>   *
>   * Use @bm to mark the pages that will be overwritten in the process of
>   * restoring the system memory state from the suspend image ("unsafe" pages)
> @@ -2496,10 +2567,15 @@ static inline void free_highmem_data(void) {}
>   * pages will be used for just yet.  Instead, we mark them all as allocated and
>   * create a lists of "safe" pages to be used later.  On systems with high
>   * memory a list of "safe" highmem pages is created too.
> + *
> + * Because it was not known which pages were unsafe when @zero_bm was created,
> + * make a copy of it and recreate it within safe pages.
>   */
> -static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
> +static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
> +               struct memory_bitmap *zero_bm)
>  {
>         unsigned int nr_pages, nr_highmem;
> +       struct memory_bitmap tmp;
>         struct linked_page *lp;
>         int error;
>
> @@ -2516,6 +2592,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
>
>         duplicate_memory_bitmap(new_bm, bm);
>         memory_bm_free(bm, PG_UNSAFE_KEEP);
> +
> +       /* Make a copy of zero_bm so it can be created in safe pages */
> +       error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
> +       if (error)
> +               goto Free;
> +
> +       duplicate_memory_bitmap(&tmp, zero_bm);
> +       memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
> +
> +       /* Recreate zero_bm in safe pages */
> +       error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
> +       if (error)
> +               goto Free;
> +
> +       duplicate_memory_bitmap(zero_bm, &tmp);
> +       memory_bm_free(&tmp, PG_UNSAFE_KEEP);
> +       /* At this point zero_bm is in safe pages and it can be used for restoring. */
> +
>         if (nr_highmem > 0) {
>                 error = prepare_highmem_image(bm, &nr_highmem);
>                 if (error)
> @@ -2530,7 +2624,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
>          *
>          * nr_copy_pages cannot be less than allocated_unsafe_pages too.
>          */
> -       nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
> +       nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
>         nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
>         while (nr_pages > 0) {
>                 lp = get_image_page(GFP_ATOMIC, PG_SAFE);
> @@ -2543,7 +2637,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
>                 nr_pages--;
>         }
>         /* Preallocate memory for the image */
> -       nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
> +       nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
>         while (nr_pages > 0) {
>                 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
>                 if (!lp) {
> @@ -2631,8 +2725,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
>         static struct chain_allocator ca;
>         int error = 0;
>
> +next:
>         /* Check if we have already loaded the entire image */
> -       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
> +       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
>                 return 0;
>
>         handle->sync_read = 1;
> @@ -2657,19 +2752,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
>                 if (error)
>                         return error;
>
> +               error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
> +               if (error)
> +                       return error;
> +
> +               nr_zero_pages = 0;
> +
>                 hibernate_restore_protection_begin();
>         } else if (handle->cur <= nr_meta_pages + 1) {
> -               error = unpack_orig_pfns(buffer, &copy_bm);
> +               error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
>                 if (error)
>                         return error;
>
>                 if (handle->cur == nr_meta_pages + 1) {
> -                       error = prepare_image(&orig_bm, &copy_bm);
> +                       error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
>                         if (error)
>                                 return error;
>
>                         chain_init(&ca, GFP_ATOMIC, PG_SAFE);
>                         memory_bm_position_reset(&orig_bm);
> +                       memory_bm_position_reset(&zero_bm);
>                         restore_pblist = NULL;
>                         handle->buffer = get_buffer(&orig_bm, &ca);
>                         handle->sync_read = 0;
> @@ -2686,6 +2788,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
>                         handle->sync_read = 0;
>         }
>         handle->cur++;
> +
> +       /* Zero pages were not included in the image, memset it and move on. */
> +       if ((handle->cur > (nr_meta_pages + 1)) &&
> +                       memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
> +               memset(handle->buffer, 0, PAGE_SIZE);
> +               goto next;
> +       }
> +
>         return PAGE_SIZE;
>  }
>
> @@ -2702,7 +2812,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
>         copy_last_highmem_page();
>         hibernate_restore_protect_page(handle->buffer);
>         /* Do that only if we have loaded the image entirely */
> -       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
> +       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
>                 memory_bm_recycle(&orig_bm);
>                 free_highmem_data();
>         }
> @@ -2711,7 +2821,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
>  int snapshot_image_loaded(struct snapshot_handle *handle)
>  {
>         return !(!nr_copy_pages || !last_highmem_page_copied() ||
> -                       handle->cur <= nr_meta_pages + nr_copy_pages);
> +                       handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages);
>  }
>
>  #ifdef CONFIG_HIGHMEM
> --

Applied as 6.6 material with some minor adjustments, thanks!

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-07-20 18:19 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-13 19:30 [PATCH] PM: hibernate: don't store zero pages in the image file Brian Geffon
2023-01-14  0:15 ` [PATCH v2] " Brian Geffon
2023-02-09 19:44   ` Rafael J. Wysocki
2023-02-13 14:45     ` Brian Geffon
2023-02-18 11:48       ` Pavel Machek
2023-02-19 18:56         ` Brian Geffon
2023-03-02 17:25     ` Brian Geffon
2023-03-02 17:13 ` [PATCH v3] " Brian Geffon
2023-03-23 16:19   ` Matthias Kaehlcke
2023-03-27 14:20     ` Brian Geffon
2023-04-06 18:20 ` [PATCH v4] " Brian Geffon
2023-06-22 16:22   ` Rafael J. Wysocki
2023-06-26 15:44 ` [PATCH] " Brian Geffon
2023-06-26 15:47 ` [PATCH v6] " Brian Geffon
2023-07-14 17:55 ` [PATCH v7] " Brian Geffon
2023-07-20 18:18   ` Rafael J. Wysocki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).