From: Namjae Jeon <linkinjeon@kernel.org>
To: viro@zeniv.linux.org.uk, brauner@kernel.org, hch@infradead.org,
hch@lst.de, tytso@mit.edu, willy@infradead.org, jack@suse.cz,
djwong@kernel.org, josef@toxicpanda.com, sandeen@sandeen.net,
rgoldwyn@suse.com, xiang@kernel.org, dsterba@suse.com,
pali@kernel.org, ebiggers@kernel.org, neil@brown.name,
amir73il@gmail.com
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
iamjoonsoo.kim@lge.com, cheol.lee@lge.com, jay.sim@lge.com,
gunho.lee@lge.com, Namjae Jeon <linkinjeon@kernel.org>,
Hyunchul Lee <hyc.lee@gmail.com>
Subject: [PATCH v2 08/11] ntfsplus: add runlist handling and cluster allocator
Date: Thu, 27 Nov 2025 13:59:41 +0900 [thread overview]
Message-ID: <20251127045944.26009-9-linkinjeon@kernel.org> (raw)
In-Reply-To: <20251127045944.26009-1-linkinjeon@kernel.org>
This adds the implementation of runlist handling and cluster allocator
for ntfsplus.
Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
fs/ntfsplus/bitmap.c | 290 ++++++
fs/ntfsplus/lcnalloc.c | 1012 ++++++++++++++++++++
fs/ntfsplus/runlist.c | 1983 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 3285 insertions(+)
create mode 100644 fs/ntfsplus/bitmap.c
create mode 100644 fs/ntfsplus/lcnalloc.c
create mode 100644 fs/ntfsplus/runlist.c
diff --git a/fs/ntfsplus/bitmap.c b/fs/ntfsplus/bitmap.c
new file mode 100644
index 000000000000..a806f01db839
--- /dev/null
+++ b/fs/ntfsplus/bitmap.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * NTFS kernel bitmap handling. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2004-2005 Anton Altaparmakov
+ * Copyright (c) 2025 LG Electronics Co., Ltd.
+ */
+
+#include <linux/bitops.h>
+
+#include "bitmap.h"
+#include "aops.h"
+#include "ntfs.h"
+
+int ntfsp_trim_fs(struct ntfs_volume *vol, struct fstrim_range *range)
+{
+ size_t buf_clusters;
+ pgoff_t index, start_index, end_index;
+ struct file_ra_state *ra;
+ struct folio *folio;
+ unsigned long *bitmap;
+ char *kaddr;
+ u64 end, trimmed = 0, start_buf, end_buf, end_cluster;
+ u64 start_cluster = range->start >> vol->cluster_size_bits;
+ u32 dq = bdev_discard_granularity(vol->sb->s_bdev);
+ int ret = 0;
+
+ if (!dq)
+ dq = vol->cluster_size;
+
+ if (start_cluster >= vol->nr_clusters)
+ return -EINVAL;
+
+ if (range->len == (u64)-1)
+ end_cluster = vol->nr_clusters;
+ else {
+ end_cluster = (range->start + range->len + vol->cluster_size - 1) >>
+ vol->cluster_size_bits;
+ if (end_cluster > vol->nr_clusters)
+ end_cluster = vol->nr_clusters;
+ }
+
+ ra = kzalloc(sizeof(*ra), GFP_NOFS);
+ if (!ra)
+ return -ENOMEM;
+
+ buf_clusters = PAGE_SIZE * 8;
+ start_index = start_cluster >> 15;
+ end_index = (end_cluster + buf_clusters - 1) >> 15;
+
+ for (index = start_index; index < end_index; index++) {
+ folio = filemap_lock_folio(vol->lcnbmp_ino->i_mapping, index);
+ if (IS_ERR(folio)) {
+ page_cache_sync_readahead(vol->lcnbmp_ino->i_mapping, ra, NULL,
+ index, end_index - index);
+ folio = ntfs_read_mapping_folio(vol->lcnbmp_ino->i_mapping, index);
+ if (!IS_ERR(folio))
+ folio_lock(folio);
+ }
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
+ goto out_free;
+ }
+
+ kaddr = kmap_local_folio(folio, 0);
+ bitmap = (unsigned long *)kaddr;
+
+ start_buf = max_t(u64, index * buf_clusters, start_cluster);
+ end_buf = min_t(u64, (index + 1) * buf_clusters, end_cluster);
+
+ end = start_buf;
+ while (end < end_buf) {
+ u64 aligned_start, aligned_count;
+ u64 start = find_next_zero_bit(bitmap, end_buf - start_buf,
+ end - start_buf) + start_buf;
+ if (start >= end_buf)
+ break;
+
+ end = find_next_bit(bitmap, end_buf - start_buf,
+ start - start_buf) + start_buf;
+
+ aligned_start = ALIGN(start << vol->cluster_size_bits, dq);
+ aligned_count = ALIGN_DOWN((end - start) << vol->cluster_size_bits, dq);
+ if (aligned_count >= range->minlen) {
+ ret = blkdev_issue_discard(vol->sb->s_bdev, aligned_start >> 9,
+ aligned_count >> 9, GFP_NOFS);
+ if (ret)
+ goto out_unmap;
+ trimmed += aligned_count;
+ }
+ }
+
+out_unmap:
+ kunmap_local(kaddr);
+ folio_unlock(folio);
+ folio_put(folio);
+
+ if (ret)
+ goto out_free;
+ }
+
+ range->len = trimmed;
+
+out_free:
+ kfree(ra);
+ return ret;
+}
+
+/**
+ * __ntfs_bitmap_set_bits_in_run - set a run of bits in a bitmap to a value
+ * @vi: vfs inode describing the bitmap
+ * @start_bit: first bit to set
+ * @count: number of bits to set
+ * @value: value to set the bits to (i.e. 0 or 1)
+ * @is_rollback: if 'true' this is a rollback operation
+ *
+ * Set @count bits starting at bit @start_bit in the bitmap described by the
+ * vfs inode @vi to @value, where @value is either 0 or 1.
+ *
+ * @is_rollback should always be 'false', it is for internal use to rollback
+ * errors. You probably want to use ntfs_bitmap_set_bits_in_run() instead.
+ */
+int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
+ const s64 count, const u8 value, const bool is_rollback)
+{
+ s64 cnt = count;
+ pgoff_t index, end_index;
+ struct address_space *mapping;
+ struct folio *folio;
+ u8 *kaddr;
+ int pos, len;
+ u8 bit;
+ struct ntfs_inode *ni = NTFS_I(vi);
+ struct ntfs_volume *vol = ni->vol;
+
+ ntfs_debug("Entering for i_ino 0x%lx, start_bit 0x%llx, count 0x%llx, value %u.%s",
+ vi->i_ino, (unsigned long long)start_bit,
+ (unsigned long long)cnt, (unsigned int)value,
+ is_rollback ? " (rollback)" : "");
+
+ if (start_bit < 0 || cnt < 0 || value > 1)
+ return -EINVAL;
+
+ /*
+ * Calculate the indices for the pages containing the first and last
+ * bits, i.e. @start_bit and @start_bit + @cnt - 1, respectively.
+ */
+ index = start_bit >> (3 + PAGE_SHIFT);
+ end_index = (start_bit + cnt - 1) >> (3 + PAGE_SHIFT);
+
+ /* Get the page containing the first bit (@start_bit). */
+ mapping = vi->i_mapping;
+ folio = ntfs_read_mapping_folio(mapping, index);
+ if (IS_ERR(folio)) {
+ if (!is_rollback)
+ ntfs_error(vi->i_sb,
+ "Failed to map first page (error %li), aborting.",
+ PTR_ERR(folio));
+ return PTR_ERR(folio);
+ }
+
+ folio_lock(folio);
+ kaddr = kmap_local_folio(folio, 0);
+
+ /* Set @pos to the position of the byte containing @start_bit. */
+ pos = (start_bit >> 3) & ~PAGE_MASK;
+
+ /* Calculate the position of @start_bit in the first byte. */
+ bit = start_bit & 7;
+
+ /* If the first byte is partial, modify the appropriate bits in it. */
+ if (bit) {
+ u8 *byte = kaddr + pos;
+
+ if (ni->mft_no == FILE_Bitmap)
+ ntfs_set_lcn_empty_bits(vol, index, value, min_t(s64, 8 - bit, cnt));
+ while ((bit & 7) && cnt) {
+ cnt--;
+ if (value)
+ *byte |= 1 << bit++;
+ else
+ *byte &= ~(1 << bit++);
+ }
+ /* If we are done, unmap the page and return success. */
+ if (!cnt)
+ goto done;
+
+ /* Update @pos to the new position. */
+ pos++;
+ }
+ /*
+ * Depending on @value, modify all remaining whole bytes in the page up
+ * to @cnt.
+ */
+ len = min_t(s64, cnt >> 3, PAGE_SIZE - pos);
+ memset(kaddr + pos, value ? 0xff : 0, len);
+ cnt -= len << 3;
+ if (ni->mft_no == FILE_Bitmap)
+ ntfs_set_lcn_empty_bits(vol, index, value, len << 3);
+
+ /* Update @len to point to the first not-done byte in the page. */
+ if (cnt < 8)
+ len += pos;
+
+ /* If we are not in the last page, deal with all subsequent pages. */
+ while (index < end_index) {
+ if (cnt <= 0)
+ goto rollback;
+
+ /* Update @index and get the next folio. */
+ flush_dcache_folio(folio);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ ntfs_unmap_folio(folio, kaddr);
+ folio = ntfs_read_mapping_folio(mapping, ++index);
+ if (IS_ERR(folio)) {
+ ntfs_error(vi->i_sb,
+ "Failed to map subsequent page (error %li), aborting.",
+ PTR_ERR(folio));
+ goto rollback;
+ }
+
+ folio_lock(folio);
+ kaddr = kmap_local_folio(folio, 0);
+ /*
+ * Depending on @value, modify all remaining whole bytes in the
+ * page up to @cnt.
+ */
+ len = min_t(s64, cnt >> 3, PAGE_SIZE);
+ memset(kaddr, value ? 0xff : 0, len);
+ cnt -= len << 3;
+ if (ni->mft_no == FILE_Bitmap)
+ ntfs_set_lcn_empty_bits(vol, index, value, len << 3);
+ }
+ /*
+ * The currently mapped page is the last one. If the last byte is
+ * partial, modify the appropriate bits in it. Note, @len is the
+ * position of the last byte inside the page.
+ */
+ if (cnt) {
+ u8 *byte;
+
+ WARN_ON(cnt > 7);
+
+ bit = cnt;
+ byte = kaddr + len;
+ if (ni->mft_no == FILE_Bitmap)
+ ntfs_set_lcn_empty_bits(vol, index, value, bit);
+ while (bit--) {
+ if (value)
+ *byte |= 1 << bit;
+ else
+ *byte &= ~(1 << bit);
+ }
+ }
+done:
+ /* We are done. Unmap the folio and return success. */
+ flush_dcache_folio(folio);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ ntfs_unmap_folio(folio, kaddr);
+ ntfs_debug("Done.");
+ return 0;
+rollback:
+ /*
+ * Current state:
+ * - no pages are mapped
+ * - @count - @cnt is the number of bits that have been modified
+ */
+ if (is_rollback)
+ return PTR_ERR(folio);
+ if (count != cnt)
+ pos = __ntfs_bitmap_set_bits_in_run(vi, start_bit, count - cnt,
+ value ? 0 : 1, true);
+ else
+ pos = 0;
+ if (!pos) {
+ /* Rollback was successful. */
+ ntfs_error(vi->i_sb,
+ "Failed to map subsequent page (error %li), aborting.",
+ PTR_ERR(folio));
+ } else {
+ /* Rollback failed. */
+ ntfs_error(vi->i_sb,
+ "Failed to map subsequent page (error %li) and rollback failed (error %i). Aborting and leaving inconsistent metadata. Unmount and run chkdsk.",
+ PTR_ERR(folio), pos);
+ NVolSetErrors(NTFS_SB(vi->i_sb));
+ }
+ return PTR_ERR(folio);
+}
diff --git a/fs/ntfsplus/lcnalloc.c b/fs/ntfsplus/lcnalloc.c
new file mode 100644
index 000000000000..92db7c7780e9
--- /dev/null
+++ b/fs/ntfsplus/lcnalloc.c
@@ -0,0 +1,1012 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cluster (de)allocation code. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2004-2005 Anton Altaparmakov
+ * Copyright (c) 2025 LG Electronics Co., Ltd.
+ *
+ * Part of this file is based on code from the NTFS-3G project.
+ * and is copyrighted by the respective authors below:
+ * Copyright (c) 2002-2004 Anton Altaparmakov
+ * Copyright (c) 2004 Yura Pakhuchiy
+ * Copyright (c) 2004-2008 Szabolcs Szakacsits
+ * Copyright (c) 2008-2009 Jean-Pierre Andre
+ */
+
+#include "lcnalloc.h"
+#include "bitmap.h"
+#include "misc.h"
+#include "aops.h"
+#include "ntfs.h"
+
+/**
+ * ntfs_cluster_free_from_rl_nolock - free clusters from runlist
+ * @vol: mounted ntfs volume on which to free the clusters
+ * @rl: runlist describing the clusters to free
+ *
+ * Free all the clusters described by the runlist @rl on the volume @vol. In
+ * the case of an error being returned, at least some of the clusters were not
+ * freed.
+ *
+ * Return 0 on success and -errno on error.
+ *
+ * Locking: - The volume lcn bitmap must be locked for writing on entry and is
+ * left locked on return.
+ */
+int ntfs_cluster_free_from_rl_nolock(struct ntfs_volume *vol,
+ const struct runlist_element *rl)
+{
+ struct inode *lcnbmp_vi = vol->lcnbmp_ino;
+ int ret = 0;
+ s64 nr_freed = 0;
+
+ ntfs_debug("Entering.");
+ if (!rl)
+ return 0;
+
+ if (!NVolFreeClusterKnown(vol))
+ wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
+
+ for (; rl->length; rl++) {
+ int err;
+
+ if (rl->lcn < 0)
+ continue;
+ err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
+ if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
+ ret = err;
+ else
+ nr_freed += rl->length;
+ }
+ ntfs_inc_free_clusters(vol, nr_freed);
+ ntfs_debug("Done.");
+ return ret;
+}
+
+static s64 max_empty_bit_range(unsigned char *buf, int size)
+{
+ int i, j, run = 0;
+ int max_range = 0;
+ s64 start_pos = -1;
+
+ ntfs_debug("Entering\n");
+
+ i = 0;
+ while (i < size) {
+ switch (*buf) {
+ case 0:
+ do {
+ buf++;
+ run += 8;
+ i++;
+ } while ((i < size) && !*buf);
+ break;
+ case 255:
+ if (run > max_range) {
+ max_range = run;
+ start_pos = (s64)i * 8 - run;
+ }
+ run = 0;
+ do {
+ buf++;
+ i++;
+ } while ((i < size) && (*buf == 255));
+ break;
+ default:
+ for (j = 0; j < 8; j++) {
+ int bit = *buf & (1 << j);
+
+ if (bit) {
+ if (run > max_range) {
+ max_range = run;
+ start_pos = (s64)i * 8 + (j - run);
+ }
+ run = 0;
+ } else
+ run++;
+ }
+ i++;
+ buf++;
+ }
+ }
+
+ if (run > max_range)
+ start_pos = (s64)i * 8 - run;
+
+ return start_pos;
+}
+
+/**
+ * ntfs_cluster_alloc - allocate clusters on an ntfs volume
+ *
+ * Allocate @count clusters preferably starting at cluster @start_lcn or at the
+ * current allocator position if @start_lcn is -1, on the mounted ntfs volume
+ * @vol. @zone is either DATA_ZONE for allocation of normal clusters or
+ * MFT_ZONE for allocation of clusters for the master file table, i.e. the
+ * $MFT/$DATA attribute.
+ *
+ * @start_vcn specifies the vcn of the first allocated cluster. This makes
+ * merging the resulting runlist with the old runlist easier.
+ *
+ * If @is_extension is 'true', the caller is allocating clusters to extend an
+ * attribute and if it is 'false', the caller is allocating clusters to fill a
+ * hole in an attribute. Practically the difference is that if @is_extension
+ * is 'true' the returned runlist will be terminated with LCN_ENOENT and if
+ * @is_extension is 'false' the runlist will be terminated with
+ * LCN_RL_NOT_MAPPED.
+ *
+ * You need to check the return value with IS_ERR(). If this is false, the
+ * function was successful and the return value is a runlist describing the
+ * allocated cluster(s). If IS_ERR() is true, the function failed and
+ * PTR_ERR() gives you the error code.
+ *
+ * Notes on the allocation algorithm
+ * =================================
+ *
+ * There are two data zones. First is the area between the end of the mft zone
+ * and the end of the volume, and second is the area between the start of the
+ * volume and the start of the mft zone. On unmodified/standard NTFS 1.x
+ * volumes, the second data zone does not exist due to the mft zone being
+ * expanded to cover the start of the volume in order to reserve space for the
+ * mft bitmap attribute.
+ *
+ * This is not the prettiest function but the complexity stems from the need of
+ * implementing the mft vs data zoned approach and from the fact that we have
+ * access to the lcn bitmap in portions of up to 8192 bytes at a time, so we
+ * need to cope with crossing over boundaries of two buffers. Further, the
+ * fact that the allocator allows for caller supplied hints as to the location
+ * of where allocation should begin and the fact that the allocator keeps track
+ * of where in the data zones the next natural allocation should occur,
+ * contribute to the complexity of the function. But it should all be
+ * worthwhile, because this allocator should: 1) be a full implementation of
+ * the MFT zone approach used by Windows NT, 2) cause reduction in
+ * fragmentation, and 3) be speedy in allocations (the code is not optimized
+ * for speed, but the algorithm is, so further speed improvements are probably
+ * possible).
+ *
+ * Locking: - The volume lcn bitmap must be unlocked on entry and is unlocked
+ * on return.
+ * - This function takes the volume lcn bitmap lock for writing and
+ * modifies the bitmap contents.
+ */
+struct runlist_element *ntfs_cluster_alloc(struct ntfs_volume *vol, const s64 start_vcn,
+ const s64 count, const s64 start_lcn,
+ const int zone,
+ const bool is_extension,
+ const bool is_contig,
+ const bool is_dealloc)
+{
+ s64 zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
+ s64 prev_lcn = 0, prev_run_len = 0, mft_zone_size;
+ s64 clusters, free_clusters;
+ loff_t i_size;
+ struct inode *lcnbmp_vi;
+ struct runlist_element *rl = NULL;
+ struct address_space *mapping;
+ struct folio *folio = NULL;
+ u8 *buf = NULL, *byte;
+ int err = 0, rlpos, rlsize, buf_size, pg_off;
+ u8 pass, done_zones, search_zone, need_writeback = 0, bit;
+ unsigned int memalloc_flags;
+ u8 has_guess;
+ pgoff_t index;
+
+ ntfs_debug("Entering for start_vcn 0x%llx, count 0x%llx, start_lcn 0x%llx, zone %s_ZONE.",
+ start_vcn, count, start_lcn,
+ zone == MFT_ZONE ? "MFT" : "DATA");
+
+ lcnbmp_vi = vol->lcnbmp_ino;
+ if (start_vcn < 0 || start_lcn < LCN_HOLE ||
+ zone < FIRST_ZONE || zone > LAST_ZONE)
+ return ERR_PTR(-EINVAL);
+
+ /* Return NULL if @count is zero. */
+ if (count < 0 || !count)
+ return ERR_PTR(-EINVAL);
+
+ memalloc_flags = memalloc_nofs_save();
+
+ if (!NVolFreeClusterKnown(vol))
+ wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
+ free_clusters = atomic64_read(&vol->free_clusters);
+
+ /* Take the lcnbmp lock for writing. */
+ down_write(&vol->lcnbmp_lock);
+ if (is_dealloc == false)
+ free_clusters -= atomic64_read(&vol->dirty_clusters);
+
+ if (free_clusters < count) {
+ up_write(&vol->lcnbmp_lock);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ /*
+ * If no specific @start_lcn was requested, use the current data zone
+ * position, otherwise use the requested @start_lcn but make sure it
+ * lies outside the mft zone. Also set done_zones to 0 (no zones done)
+ * and pass depending on whether we are starting inside a zone (1) or
+ * at the beginning of a zone (2). If requesting from the MFT_ZONE,
+ * we either start at the current position within the mft zone or at
+ * the specified position. If the latter is out of bounds then we start
+ * at the beginning of the MFT_ZONE.
+ */
+ done_zones = 0;
+ pass = 1;
+ /*
+ * zone_start and zone_end are the current search range. search_zone
+ * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
+ * volume) and 4 for data zone 2 (start of volume till start of mft
+ * zone).
+ */
+ has_guess = 1;
+ zone_start = start_lcn;
+
+ if (zone_start < 0) {
+ if (zone == DATA_ZONE)
+ zone_start = vol->data1_zone_pos;
+ else
+ zone_start = vol->mft_zone_pos;
+ if (!zone_start) {
+ /*
+ * Zone starts at beginning of volume which means a
+ * single pass is sufficient.
+ */
+ pass = 2;
+ }
+ has_guess = 0;
+ }
+
+ if (!zone_start || zone_start == vol->mft_zone_start ||
+ zone_start == vol->mft_zone_end)
+ pass = 2;
+
+ if (zone_start < vol->mft_zone_start) {
+ zone_end = vol->mft_zone_start;
+ search_zone = 4;
+ /* Skip searching the mft zone. */
+ done_zones |= 1;
+ } else if (zone_start < vol->mft_zone_end) {
+ zone_end = vol->mft_zone_end;
+ search_zone = 1;
+ } else {
+ zone_end = vol->nr_clusters;
+ search_zone = 2;
+ /* Skip searching the mft zone. */
+ done_zones |= 1;
+ }
+
+ /*
+ * bmp_pos is the current bit position inside the bitmap. We use
+ * bmp_initial_pos to determine whether or not to do a zone switch.
+ */
+ bmp_pos = bmp_initial_pos = zone_start;
+
+ /* Loop until all clusters are allocated, i.e. clusters == 0. */
+ clusters = count;
+ rlpos = rlsize = 0;
+ mapping = lcnbmp_vi->i_mapping;
+ i_size = i_size_read(lcnbmp_vi);
+ while (1) {
+ ntfs_debug("Start of outer while loop: done_zones 0x%x, search_zone %i, pass %i, zone_start 0x%llx, zone_end 0x%llx, bmp_initial_pos 0x%llx, bmp_pos 0x%llx, rlpos %i, rlsize %i.",
+ done_zones, search_zone, pass,
+ zone_start, zone_end, bmp_initial_pos,
+ bmp_pos, rlpos, rlsize);
+ /* Loop until we run out of free clusters. */
+ last_read_pos = bmp_pos >> 3;
+ ntfs_debug("last_read_pos 0x%llx.", last_read_pos);
+ if (last_read_pos >= i_size) {
+ ntfs_debug("End of attribute reached. Skipping to zone_pass_done.");
+ goto zone_pass_done;
+ }
+ if (likely(folio)) {
+ if (need_writeback) {
+ ntfs_debug("Marking page dirty.");
+ flush_dcache_folio(folio);
+ folio_mark_dirty(folio);
+ need_writeback = 0;
+ }
+ folio_unlock(folio);
+ ntfs_unmap_folio(folio, buf);
+ folio = NULL;
+ }
+
+ index = last_read_pos >> PAGE_SHIFT;
+ pg_off = last_read_pos & ~PAGE_MASK;
+ buf_size = PAGE_SIZE - pg_off;
+ if (unlikely(last_read_pos + buf_size > i_size))
+ buf_size = i_size - last_read_pos;
+ buf_size <<= 3;
+ lcn = bmp_pos & 7;
+ bmp_pos &= ~(s64)7;
+
+ if (vol->lcn_empty_bits_per_page[index] == 0)
+ goto next_bmp_pos;
+
+ folio = ntfs_read_mapping_folio(mapping, index);
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
+ ntfs_error(vol->sb, "Failed to map page.");
+ goto out;
+ }
+
+ folio_lock(folio);
+ buf = kmap_local_folio(folio, 0) + pg_off;
+ ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i.",
+ buf_size, lcn, bmp_pos, need_writeback);
+ while (lcn < buf_size && lcn + bmp_pos < zone_end) {
+ byte = buf + (lcn >> 3);
+ ntfs_debug("In inner while loop: buf_size %i, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i, byte ofs 0x%x, *byte 0x%x.",
+ buf_size, lcn, bmp_pos, need_writeback,
+ (unsigned int)(lcn >> 3),
+ (unsigned int)*byte);
+ bit = 1 << (lcn & 7);
+ ntfs_debug("bit 0x%x.", bit);
+
+ if (has_guess) {
+ if (*byte & bit) {
+ if (is_contig == true && prev_run_len > 0)
+ goto done;
+
+ has_guess = 0;
+ break;
+ }
+ } else {
+ lcn = max_empty_bit_range(buf, buf_size >> 3);
+ if (lcn < 0)
+ break;
+ has_guess = 1;
+ continue;
+ }
+ /*
+ * Allocate more memory if needed, including space for
+ * the terminator element.
+ * ntfs_malloc_nofs() operates on whole pages only.
+ */
+ if ((rlpos + 2) * sizeof(*rl) > rlsize) {
+ struct runlist_element *rl2;
+
+ ntfs_debug("Reallocating memory.");
+ if (!rl)
+ ntfs_debug("First free bit is at s64 0x%llx.",
+ lcn + bmp_pos);
+ rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
+ if (unlikely(!rl2)) {
+ err = -ENOMEM;
+ ntfs_error(vol->sb, "Failed to allocate memory.");
+ goto out;
+ }
+ memcpy(rl2, rl, rlsize);
+ ntfs_free(rl);
+ rl = rl2;
+ rlsize += PAGE_SIZE;
+ ntfs_debug("Reallocated memory, rlsize 0x%x.",
+ rlsize);
+ }
+ /* Allocate the bitmap bit. */
+ *byte |= bit;
+ /* We need to write this bitmap page to disk. */
+ need_writeback = 1;
+ ntfs_debug("*byte 0x%x, need_writeback is set.",
+ (unsigned int)*byte);
+ ntfs_dec_free_clusters(vol, 1);
+ ntfs_set_lcn_empty_bits(vol, index, 1, 1);
+
+ /*
+ * Coalesce with previous run if adjacent LCNs.
+ * Otherwise, append a new run.
+ */
+ ntfs_debug("Adding run (lcn 0x%llx, len 0x%llx), prev_lcn 0x%llx, lcn 0x%llx, bmp_pos 0x%llx, prev_run_len 0x%llx, rlpos %i.",
+ lcn + bmp_pos, 1ULL, prev_lcn,
+ lcn, bmp_pos, prev_run_len, rlpos);
+ if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
+ ntfs_debug("Coalescing to run (lcn 0x%llx, len 0x%llx).",
+ rl[rlpos - 1].lcn,
+ rl[rlpos - 1].length);
+ rl[rlpos - 1].length = ++prev_run_len;
+ ntfs_debug("Run now (lcn 0x%llx, len 0x%llx), prev_run_len 0x%llx.",
+ rl[rlpos - 1].lcn,
+ rl[rlpos - 1].length,
+ prev_run_len);
+ } else {
+ if (likely(rlpos)) {
+ ntfs_debug("Adding new run, (previous run lcn 0x%llx, len 0x%llx).",
+ rl[rlpos - 1].lcn, rl[rlpos - 1].length);
+ rl[rlpos].vcn = rl[rlpos - 1].vcn +
+ prev_run_len;
+ } else {
+ ntfs_debug("Adding new run, is first run.");
+ rl[rlpos].vcn = start_vcn;
+ }
+ rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
+ rl[rlpos].length = prev_run_len = 1;
+ rlpos++;
+ }
+ /* Done? */
+ if (!--clusters) {
+ s64 tc;
+done:
+ /*
+ * Update the current zone position. Positions
+ * of already scanned zones have been updated
+ * during the respective zone switches.
+ */
+ tc = lcn + bmp_pos + 1;
+ ntfs_debug("Done. Updating current zone position, tc 0x%llx, search_zone %i.",
+ tc, search_zone);
+ switch (search_zone) {
+ case 1:
+ ntfs_debug("Before checks, vol->mft_zone_pos 0x%llx.",
+ vol->mft_zone_pos);
+ if (tc >= vol->mft_zone_end) {
+ vol->mft_zone_pos =
+ vol->mft_lcn;
+ if (!vol->mft_zone_end)
+ vol->mft_zone_pos = 0;
+ } else if ((bmp_initial_pos >=
+ vol->mft_zone_pos ||
+ tc > vol->mft_zone_pos)
+ && tc >= vol->mft_lcn)
+ vol->mft_zone_pos = tc;
+ ntfs_debug("After checks, vol->mft_zone_pos 0x%llx.",
+ vol->mft_zone_pos);
+ break;
+ case 2:
+ ntfs_debug("Before checks, vol->data1_zone_pos 0x%llx.",
+ vol->data1_zone_pos);
+ if (tc >= vol->nr_clusters)
+ vol->data1_zone_pos =
+ vol->mft_zone_end;
+ else if ((bmp_initial_pos >=
+ vol->data1_zone_pos ||
+ tc > vol->data1_zone_pos)
+ && tc >= vol->mft_zone_end)
+ vol->data1_zone_pos = tc;
+ ntfs_debug("After checks, vol->data1_zone_pos 0x%llx.",
+ vol->data1_zone_pos);
+ break;
+ case 4:
+ ntfs_debug("Before checks, vol->data2_zone_pos 0x%llx.",
+ vol->data2_zone_pos);
+ if (tc >= vol->mft_zone_start)
+ vol->data2_zone_pos = 0;
+ else if (bmp_initial_pos >=
+ vol->data2_zone_pos ||
+ tc > vol->data2_zone_pos)
+ vol->data2_zone_pos = tc;
+ ntfs_debug("After checks, vol->data2_zone_pos 0x%llx.",
+ vol->data2_zone_pos);
+ break;
+ default:
+ WARN_ON(1);
+ }
+ ntfs_debug("Finished. Going to out.");
+ goto out;
+ }
+ lcn++;
+ }
+next_bmp_pos:
+ bmp_pos += buf_size;
+ ntfs_debug("After inner while loop: buf_size 0x%x, lcn 0x%llx, bmp_pos 0x%llx, need_writeback %i.",
+ buf_size, lcn, bmp_pos, need_writeback);
+ if (bmp_pos < zone_end) {
+ ntfs_debug("Continuing outer while loop, bmp_pos 0x%llx, zone_end 0x%llx.",
+ bmp_pos, zone_end);
+ continue;
+ }
+zone_pass_done: /* Finished with the current zone pass. */
+ ntfs_debug("At zone_pass_done, pass %i.", pass);
+ if (pass == 1) {
+ /*
+ * Now do pass 2, scanning the first part of the zone
+ * we omitted in pass 1.
+ */
+ pass = 2;
+ zone_end = zone_start;
+ switch (search_zone) {
+ case 1: /* mft_zone */
+ zone_start = vol->mft_zone_start;
+ break;
+ case 2: /* data1_zone */
+ zone_start = vol->mft_zone_end;
+ break;
+ case 4: /* data2_zone */
+ zone_start = 0;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ /* Sanity check. */
+ if (zone_end < zone_start)
+ zone_end = zone_start;
+ bmp_pos = zone_start;
+ ntfs_debug("Continuing outer while loop, pass 2, zone_start 0x%llx, zone_end 0x%llx, bmp_pos 0x%llx.",
+ zone_start, zone_end, bmp_pos);
+ continue;
+ } /* pass == 2 */
+done_zones_check:
+ ntfs_debug("At done_zones_check, search_zone %i, done_zones before 0x%x, done_zones after 0x%x.",
+ search_zone, done_zones,
+ done_zones | search_zone);
+ done_zones |= search_zone;
+ if (done_zones < 7) {
+ ntfs_debug("Switching zone.");
+ /* Now switch to the next zone we haven't done yet. */
+ pass = 1;
+ switch (search_zone) {
+ case 1:
+ ntfs_debug("Switching from mft zone to data1 zone.");
+ /* Update mft zone position. */
+ if (rlpos) {
+ s64 tc;
+
+ ntfs_debug("Before checks, vol->mft_zone_pos 0x%llx.",
+ vol->mft_zone_pos);
+ tc = rl[rlpos - 1].lcn +
+ rl[rlpos - 1].length;
+ if (tc >= vol->mft_zone_end) {
+ vol->mft_zone_pos =
+ vol->mft_lcn;
+ if (!vol->mft_zone_end)
+ vol->mft_zone_pos = 0;
+ } else if ((bmp_initial_pos >=
+ vol->mft_zone_pos ||
+ tc > vol->mft_zone_pos)
+ && tc >= vol->mft_lcn)
+ vol->mft_zone_pos = tc;
+ ntfs_debug("After checks, vol->mft_zone_pos 0x%llx.",
+ vol->mft_zone_pos);
+ }
+ /* Switch from mft zone to data1 zone. */
+switch_to_data1_zone: search_zone = 2;
+ zone_start = bmp_initial_pos =
+ vol->data1_zone_pos;
+ zone_end = vol->nr_clusters;
+ if (zone_start == vol->mft_zone_end)
+ pass = 2;
+ if (zone_start >= zone_end) {
+ vol->data1_zone_pos = zone_start =
+ vol->mft_zone_end;
+ pass = 2;
+ }
+ break;
+ case 2:
+ ntfs_debug("Switching from data1 zone to data2 zone.");
+ /* Update data1 zone position. */
+ if (rlpos) {
+ s64 tc;
+
+ ntfs_debug("Before checks, vol->data1_zone_pos 0x%llx.",
+ vol->data1_zone_pos);
+ tc = rl[rlpos - 1].lcn +
+ rl[rlpos - 1].length;
+ if (tc >= vol->nr_clusters)
+ vol->data1_zone_pos =
+ vol->mft_zone_end;
+ else if ((bmp_initial_pos >=
+ vol->data1_zone_pos ||
+ tc > vol->data1_zone_pos)
+ && tc >= vol->mft_zone_end)
+ vol->data1_zone_pos = tc;
+ ntfs_debug("After checks, vol->data1_zone_pos 0x%llx.",
+ vol->data1_zone_pos);
+ }
+ /* Switch from data1 zone to data2 zone. */
+ search_zone = 4;
+ zone_start = bmp_initial_pos =
+ vol->data2_zone_pos;
+ zone_end = vol->mft_zone_start;
+ if (!zone_start)
+ pass = 2;
+ if (zone_start >= zone_end) {
+ vol->data2_zone_pos = zone_start =
+ bmp_initial_pos = 0;
+ pass = 2;
+ }
+ break;
+ case 4:
+ ntfs_debug("Switching from data2 zone to data1 zone.");
+ /* Update data2 zone position. */
+ if (rlpos) {
+ s64 tc;
+
+ ntfs_debug("Before checks, vol->data2_zone_pos 0x%llx.",
+ vol->data2_zone_pos);
+ tc = rl[rlpos - 1].lcn +
+ rl[rlpos - 1].length;
+ if (tc >= vol->mft_zone_start)
+ vol->data2_zone_pos = 0;
+ else if (bmp_initial_pos >=
+ vol->data2_zone_pos ||
+ tc > vol->data2_zone_pos)
+ vol->data2_zone_pos = tc;
+ ntfs_debug("After checks, vol->data2_zone_pos 0x%llx.",
+ vol->data2_zone_pos);
+ }
+ /* Switch from data2 zone to data1 zone. */
+ goto switch_to_data1_zone;
+ default:
+ WARN_ON(1);
+ }
+ ntfs_debug("After zone switch, search_zone %i, pass %i, bmp_initial_pos 0x%llx, zone_start 0x%llx, zone_end 0x%llx.",
+ search_zone, pass,
+ bmp_initial_pos,
+ zone_start,
+ zone_end);
+ bmp_pos = zone_start;
+ if (zone_start == zone_end) {
+ ntfs_debug("Empty zone, going to done_zones_check.");
+ /* Empty zone. Don't bother searching it. */
+ goto done_zones_check;
+ }
+ ntfs_debug("Continuing outer while loop.");
+ continue;
+ } /* done_zones == 7 */
+ ntfs_debug("All zones are finished.");
+ /*
+ * All zones are finished! If DATA_ZONE, shrink mft zone. If
+ * MFT_ZONE, we have really run out of space.
+ */
+ mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
+ ntfs_debug("vol->mft_zone_start 0x%llx, vol->mft_zone_end 0x%llx, mft_zone_size 0x%llx.",
+ vol->mft_zone_start, vol->mft_zone_end,
+ mft_zone_size);
+ if (zone == MFT_ZONE || mft_zone_size <= 0) {
+ ntfs_debug("No free clusters left, going to out.");
+ /* Really no more space left on device. */
+ err = -ENOSPC;
+ goto out;
+ } /* zone == DATA_ZONE && mft_zone_size > 0 */
+ ntfs_debug("Shrinking mft zone.");
+ zone_end = vol->mft_zone_end;
+ mft_zone_size >>= 1;
+ if (mft_zone_size > 0)
+ vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
+ else /* mft zone and data2 zone no longer exist. */
+ vol->data2_zone_pos = vol->mft_zone_start =
+ vol->mft_zone_end = 0;
+ if (vol->mft_zone_pos >= vol->mft_zone_end) {
+ vol->mft_zone_pos = vol->mft_lcn;
+ if (!vol->mft_zone_end)
+ vol->mft_zone_pos = 0;
+ }
+ bmp_pos = zone_start = bmp_initial_pos =
+ vol->data1_zone_pos = vol->mft_zone_end;
+ search_zone = 2;
+ pass = 2;
+ done_zones &= ~2;
+ ntfs_debug("After shrinking mft zone, mft_zone_size 0x%llx, vol->mft_zone_start 0x%llx, vol->mft_zone_end 0x%llx, vol->mft_zone_pos 0x%llx, search_zone 2, pass 2, dones_zones 0x%x, zone_start 0x%llx, zone_end 0x%llx, vol->data1_zone_pos 0x%llx, continuing outer while loop.",
+ mft_zone_size, vol->mft_zone_start,
+ vol->mft_zone_end, vol->mft_zone_pos,
+ done_zones, zone_start, zone_end,
+ vol->data1_zone_pos);
+ }
+ ntfs_debug("After outer while loop.");
+out:
+ ntfs_debug("At out.");
+ /* Add runlist terminator element. */
+ if (likely(rl)) {
+ rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
+ rl[rlpos].lcn = is_extension ? LCN_ENOENT : LCN_RL_NOT_MAPPED;
+ rl[rlpos].length = 0;
+ }
+ if (likely(folio && !IS_ERR(folio))) {
+ if (need_writeback) {
+ ntfs_debug("Marking page dirty.");
+ flush_dcache_folio(folio);
+ folio_mark_dirty(folio);
+ need_writeback = 0;
+ }
+ folio_unlock(folio);
+ ntfs_unmap_folio(folio, buf);
+ }
+ if (likely(!err)) {
+ if (is_dealloc == true)
+ ntfs_release_dirty_clusters(vol, rl->length);
+ up_write(&vol->lcnbmp_lock);
+ memalloc_nofs_restore(memalloc_flags);
+ ntfs_debug("Done.");
+ return rl == NULL ? ERR_PTR(-EIO) : rl;
+ }
+ if (err != -ENOSPC)
+ ntfs_error(vol->sb,
+ "Failed to allocate clusters, aborting (error %i).",
+ err);
+ if (rl) {
+ int err2;
+
+ if (err == -ENOSPC)
+ ntfs_debug("Not enough space to complete allocation, err -ENOSPC, first free lcn 0x%llx, could allocate up to 0x%llx clusters.",
+ rl[0].lcn, count - clusters);
+ /* Deallocate all allocated clusters. */
+ ntfs_debug("Attempting rollback...");
+ err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
+ if (err2) {
+ ntfs_error(vol->sb,
+ "Failed to rollback (error %i). Leaving inconsistent metadata! Unmount and run chkdsk.",
+ err2);
+ NVolSetErrors(vol);
+ }
+ /* Free the runlist. */
+ ntfs_free(rl);
+ } else if (err == -ENOSPC)
+ ntfs_debug("No space left at all, err = -ENOSPC, first free lcn = 0x%llx.",
+ vol->data1_zone_pos);
+ atomic64_set(&vol->dirty_clusters, 0);
+ up_write(&vol->lcnbmp_lock);
+ memalloc_nofs_restore(memalloc_flags);
+ return ERR_PTR(err);
+}
+
+/**
+ * __ntfs_cluster_free - free clusters on an ntfs volume
+ * @ni: ntfs inode whose runlist describes the clusters to free
+ * @start_vcn: vcn in the runlist of @ni at which to start freeing clusters
+ * @count: number of clusters to free or -1 for all clusters
+ * @ctx: active attribute search context if present or NULL if not
+ * @is_rollback: true if this is a rollback operation
+ *
+ * Free @count clusters starting at the cluster @start_vcn in the runlist
+ * described by the vfs inode @ni.
+ *
+ * If @count is -1, all clusters from @start_vcn to the end of the runlist are
+ * deallocated. Thus, to completely free all clusters in a runlist, use
+ * @start_vcn = 0 and @count = -1.
+ *
+ * If @ctx is specified, it is an active search context of @ni and its base mft
+ * record. This is needed when __ntfs_cluster_free() encounters unmapped
+ * runlist fragments and allows their mapping. If you do not have the mft
+ * record mapped, you can specify @ctx as NULL and __ntfs_cluster_free() will
+ * perform the necessary mapping and unmapping.
+ *
+ * Note, __ntfs_cluster_free() saves the state of @ctx on entry and restores it
+ * before returning. Thus, @ctx will be left pointing to the same attribute on
+ * return as on entry. However, the actual pointers in @ctx may point to
+ * different memory locations on return, so you must remember to reset any
+ * cached pointers from the @ctx, i.e. after the call to __ntfs_cluster_free(),
+ * you will probably want to do:
+ * m = ctx->mrec;
+ * a = ctx->attr;
+ * Assuming you cache ctx->attr in a variable @a of type attr_record * and that
+ * you cache ctx->mrec in a variable @m of type struct mft_record *.
+ *
+ * @is_rollback should always be 'false', it is for internal use to rollback
+ * errors. You probably want to use ntfs_cluster_free() instead.
+ *
+ * Note, __ntfs_cluster_free() does not modify the runlist, so you have to
+ * remove from the runlist or mark sparse the freed runs later.
+ *
+ * Return the number of deallocated clusters (not counting sparse ones) on
+ * success and -errno on error.
+ *
+ * WARNING: If @ctx is supplied, regardless of whether success or failure is
+ * returned, you need to check IS_ERR(@ctx->mrec) and if 'true' the @ctx
+ * is no longer valid, i.e. you need to either call
+ * ntfs_attr_reinit_search_ctx() or ntfs_attr_put_search_ctx() on it.
+ * In that case PTR_ERR(@ctx->mrec) will give you the error code for
+ * why the mapping of the old inode failed.
+ *
+ * Locking: - The runlist described by @ni must be locked for writing on entry
+ * and is locked on return. Note the runlist may be modified when
+ * needed runlist fragments need to be mapped.
+ * - The volume lcn bitmap must be unlocked on entry and is unlocked
+ * on return.
+ * - This function takes the volume lcn bitmap lock for writing and
+ * modifies the bitmap contents.
+ * - If @ctx is NULL, the base mft record of @ni must not be mapped on
+ * entry and it will be left unmapped on return.
+ * - If @ctx is not NULL, the base mft record must be mapped on entry
+ * and it will be left mapped on return.
+ */
+s64 __ntfs_cluster_free(struct ntfs_inode *ni, const s64 start_vcn, s64 count,
+ struct ntfs_attr_search_ctx *ctx, const bool is_rollback)
+{
+ s64 delta, to_free, total_freed, real_freed;
+ struct ntfs_volume *vol;
+ struct inode *lcnbmp_vi;
+ struct runlist_element *rl;
+ int err;
+ unsigned int memalloc_flags;
+
+ ntfs_debug("Entering for i_ino 0x%lx, start_vcn 0x%llx, count 0x%llx.%s",
+ ni->mft_no, start_vcn, count,
+ is_rollback ? " (rollback)" : "");
+ vol = ni->vol;
+ lcnbmp_vi = vol->lcnbmp_ino;
+ if (start_vcn < 0 || count < -1)
+ return -EINVAL;
+
+ if (!NVolFreeClusterKnown(vol))
+ wait_event(vol->free_waitq, NVolFreeClusterKnown(vol));
+
+ /*
+ * Lock the lcn bitmap for writing but only if not rolling back. We
+ * must hold the lock all the way including through rollback otherwise
+ * rollback is not possible because once we have cleared a bit and
+ * dropped the lock, anyone could have set the bit again, thus
+ * allocating the cluster for another use.
+ */
+ if (likely(!is_rollback)) {
+ memalloc_flags = memalloc_nofs_save();
+ down_write(&vol->lcnbmp_lock);
+ }
+
+ total_freed = real_freed = 0;
+
+ rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
+ if (IS_ERR(rl)) {
+ err = PTR_ERR(rl);
+ if (err == -ENOENT) {
+ if (likely(!is_rollback)) {
+ up_write(&vol->lcnbmp_lock);
+ memalloc_nofs_restore(memalloc_flags);
+ }
+ return 0;
+ }
+
+ if (!is_rollback)
+ ntfs_error(vol->sb,
+ "Failed to find first runlist element (error %d), aborting.",
+ err);
+ goto err_out;
+ }
+ if (unlikely(rl->lcn < LCN_HOLE)) {
+ if (!is_rollback)
+ ntfs_error(vol->sb, "First runlist element has invalid lcn, aborting.");
+ err = -EIO;
+ goto err_out;
+ }
+ /* Find the starting cluster inside the run that needs freeing. */
+ delta = start_vcn - rl->vcn;
+
+ /* The number of clusters in this run that need freeing. */
+ to_free = rl->length - delta;
+ if (count >= 0 && to_free > count)
+ to_free = count;
+
+ if (likely(rl->lcn >= 0)) {
+ /* Do the actual freeing of the clusters in this run. */
+ err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn + delta,
+ to_free, likely(!is_rollback) ? 0 : 1);
+ if (unlikely(err)) {
+ if (!is_rollback)
+ ntfs_error(vol->sb,
+ "Failed to clear first run (error %i), aborting.",
+ err);
+ goto err_out;
+ }
+ /* We have freed @to_free real clusters. */
+ real_freed = to_free;
+ }
+ /* Go to the next run and adjust the number of clusters left to free. */
+ ++rl;
+ if (count >= 0)
+ count -= to_free;
+
+ /* Keep track of the total "freed" clusters, including sparse ones. */
+ total_freed = to_free;
+ /*
+ * Loop over the remaining runs, using @count as a capping value, and
+ * free them.
+ */
+ for (; rl->length && count != 0; ++rl) {
+ if (unlikely(rl->lcn < LCN_HOLE)) {
+ s64 vcn;
+
+ /* Attempt to map runlist. */
+ vcn = rl->vcn;
+ rl = ntfs_attr_find_vcn_nolock(ni, vcn, ctx);
+ if (IS_ERR(rl)) {
+ err = PTR_ERR(rl);
+ if (!is_rollback)
+ ntfs_error(vol->sb,
+ "Failed to map runlist fragment or failed to find subsequent runlist element.");
+ goto err_out;
+ }
+ if (unlikely(rl->lcn < LCN_HOLE)) {
+ if (!is_rollback)
+ ntfs_error(vol->sb,
+ "Runlist element has invalid lcn (0x%llx).",
+ rl->lcn);
+ err = -EIO;
+ goto err_out;
+ }
+ }
+ /* The number of clusters in this run that need freeing. */
+ to_free = rl->length;
+ if (count >= 0 && to_free > count)
+ to_free = count;
+
+ if (likely(rl->lcn >= 0)) {
+ /* Do the actual freeing of the clusters in the run. */
+ err = ntfs_bitmap_set_bits_in_run(lcnbmp_vi, rl->lcn,
+ to_free, likely(!is_rollback) ? 0 : 1);
+ if (unlikely(err)) {
+ if (!is_rollback)
+ ntfs_error(vol->sb, "Failed to clear subsequent run.");
+ goto err_out;
+ }
+ /* We have freed @to_free real clusters. */
+ real_freed += to_free;
+ }
+ /* Adjust the number of clusters left to free. */
+ if (count >= 0)
+ count -= to_free;
+
+ /* Update the total done clusters. */
+ total_freed += to_free;
+ }
+ ntfs_inc_free_clusters(vol, real_freed);
+ if (likely(!is_rollback)) {
+ up_write(&vol->lcnbmp_lock);
+ memalloc_nofs_restore(memalloc_flags);
+ }
+
+ WARN_ON(count > 0);
+
+ if (NVolDiscard(vol) && !is_rollback) {
+ s64 total_discarded = 0, rl_off;
+ u32 gran = bdev_discard_granularity(vol->sb->s_bdev);
+
+ rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, ctx);
+ if (IS_ERR(rl))
+ return real_freed;
+ rl_off = start_vcn - rl->vcn;
+ while (rl->length && total_discarded < total_freed) {
+ s64 to_discard = rl->length - rl_off;
+
+ if (to_discard + total_discarded > total_freed)
+ to_discard = total_freed - total_discarded;
+ if (rl->lcn >= 0) {
+ sector_t start_sector, end_sector;
+ int ret;
+
+ start_sector = ALIGN((rl->lcn + rl_off) << vol->cluster_size_bits,
+ gran) >> SECTOR_SHIFT;
+ end_sector = ALIGN_DOWN((rl->lcn + rl_off + to_discard) <<
+ vol->cluster_size_bits, gran) >>
+ SECTOR_SHIFT;
+ if (start_sector < end_sector) {
+ ret = blkdev_issue_discard(vol->sb->s_bdev, start_sector,
+ end_sector - start_sector,
+ GFP_NOFS);
+ if (ret)
+ break;
+ }
+ }
+
+ total_discarded += to_discard;
+ ++rl;
+ rl_off = 0;
+ }
+ }
+
+ /* We are done. Return the number of actually freed clusters. */
+ ntfs_debug("Done.");
+ return real_freed;
+err_out:
+ if (is_rollback)
+ return err;
+ /* If no real clusters were freed, no need to rollback. */
+ if (!real_freed) {
+ up_write(&vol->lcnbmp_lock);
+ memalloc_nofs_restore(memalloc_flags);
+ return err;
+ }
+ /*
+ * Attempt to rollback and if that succeeds just return the error code.
+ * If rollback fails, set the volume errors flag, emit an error
+ * message, and return the error code.
+ */
+ delta = __ntfs_cluster_free(ni, start_vcn, total_freed, ctx, true);
+ if (delta < 0) {
+ ntfs_error(vol->sb,
+ "Failed to rollback (error %i). Leaving inconsistent metadata! Unmount and run chkdsk.",
+ (int)delta);
+ NVolSetErrors(vol);
+ }
+ ntfs_dec_free_clusters(vol, delta);
+ up_write(&vol->lcnbmp_lock);
+ memalloc_nofs_restore(memalloc_flags);
+ ntfs_error(vol->sb, "Aborting (error %i).", err);
+ return err;
+}
diff --git a/fs/ntfsplus/runlist.c b/fs/ntfsplus/runlist.c
new file mode 100644
index 000000000000..3dcd797efcc8
--- /dev/null
+++ b/fs/ntfsplus/runlist.c
@@ -0,0 +1,1983 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/**
+ * NTFS runlist handling code.
+ * Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2002-2005 Richard Russon
+ * Copyright (c) 2025 LG Electronics Co., Ltd.
+ *
+ * Part of this file is based on code from the NTFS-3G project.
+ * and is copyrighted by the respective authors below:
+ * Copyright (c) 2002-2005 Anton Altaparmakov
+ * Copyright (c) 2002-2005 Richard Russon
+ * Copyright (c) 2002-2008 Szabolcs Szakacsits
+ * Copyright (c) 2004 Yura Pakhuchiy
+ * Copyright (c) 2007-2022 Jean-Pierre Andre
+ */
+
+#include "misc.h"
+#include "ntfs.h"
+#include "attrib.h"
+
+/**
+ * ntfs_rl_mm - runlist memmove
+ *
+ * It is up to the caller to serialize access to the runlist @base.
+ */
+static inline void ntfs_rl_mm(struct runlist_element *base, int dst, int src, int size)
+{
+ if (likely((dst != src) && (size > 0)))
+ memmove(base + dst, base + src, size * sizeof(*base));
+}
+
+/**
+ * ntfs_rl_mc - runlist memory copy
+ *
+ * It is up to the caller to serialize access to the runlists @dstbase and
+ * @srcbase.
+ */
+static inline void ntfs_rl_mc(struct runlist_element *dstbase, int dst,
+ struct runlist_element *srcbase, int src, int size)
+{
+ if (likely(size > 0))
+ memcpy(dstbase + dst, srcbase + src, size * sizeof(*dstbase));
+}
+
+/**
+ * ntfs_rl_realloc - Reallocate memory for runlists
+ * @rl: original runlist
+ * @old_size: number of runlist elements in the original runlist @rl
+ * @new_size: number of runlist elements we need space for
+ *
+ * As the runlists grow, more memory will be required. To prevent the
+ * kernel having to allocate and reallocate large numbers of small bits of
+ * memory, this function returns an entire page of memory.
+ *
+ * It is up to the caller to serialize access to the runlist @rl.
+ *
+ * N.B. If the new allocation doesn't require a different number of pages in
+ * memory, the function will return the original pointer.
+ */
+struct runlist_element *ntfs_rl_realloc(struct runlist_element *rl,
+ int old_size, int new_size)
+{
+ struct runlist_element *new_rl;
+
+ old_size = PAGE_ALIGN(old_size * sizeof(*rl));
+ new_size = PAGE_ALIGN(new_size * sizeof(*rl));
+ if (old_size == new_size)
+ return rl;
+
+ new_rl = ntfs_malloc_nofs(new_size);
+ if (unlikely(!new_rl))
+ return ERR_PTR(-ENOMEM);
+
+ if (likely(rl != NULL)) {
+ if (unlikely(old_size > new_size))
+ old_size = new_size;
+ memcpy(new_rl, rl, old_size);
+ ntfs_free(rl);
+ }
+ return new_rl;
+}
+
+/**
+ * ntfs_rl_realloc_nofail - Reallocate memory for runlists
+ * @rl: original runlist
+ * @old_size: number of runlist elements in the original runlist @rl
+ * @new_size: number of runlist elements we need space for
+ *
+ * As the runlists grow, more memory will be required. To prevent the
+ * kernel having to allocate and reallocate large numbers of small bits of
+ * memory, this function returns an entire page of memory.
+ *
+ * This function guarantees that the allocation will succeed. It will sleep
+ * for as long as it takes to complete the allocation.
+ *
+ * It is up to the caller to serialize access to the runlist @rl.
+ *
+ * N.B. If the new allocation doesn't require a different number of pages in
+ * memory, the function will return the original pointer.
+ */
+static inline struct runlist_element *ntfs_rl_realloc_nofail(struct runlist_element *rl,
+ int old_size, int new_size)
+{
+ struct runlist_element *new_rl;
+
+ old_size = PAGE_ALIGN(old_size * sizeof(*rl));
+ new_size = PAGE_ALIGN(new_size * sizeof(*rl));
+ if (old_size == new_size)
+ return rl;
+
+ new_rl = ntfs_malloc_nofs_nofail(new_size);
+
+ if (likely(rl != NULL)) {
+ if (unlikely(old_size > new_size))
+ old_size = new_size;
+ memcpy(new_rl, rl, old_size);
+ ntfs_free(rl);
+ }
+ return new_rl;
+}
+
+/**
+ * ntfs_are_rl_mergeable - test if two runlists can be joined together
+ * @dst: original runlist
+ * @src: new runlist to test for mergeability with @dst
+ *
+ * Test if two runlists can be joined together. For this, their VCNs and LCNs
+ * must be adjacent.
+ *
+ * It is up to the caller to serialize access to the runlists @dst and @src.
+ *
+ * Return: true Success, the runlists can be merged.
+ * false Failure, the runlists cannot be merged.
+ */
+static inline bool ntfs_are_rl_mergeable(struct runlist_element *dst,
+ struct runlist_element *src)
+{
+ /* We can merge unmapped regions even if they are misaligned. */
+ if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED))
+ return true;
+ /* If the runs are misaligned, we cannot merge them. */
+ if ((dst->vcn + dst->length) != src->vcn)
+ return false;
+ /* If both runs are non-sparse and contiguous, we can merge them. */
+ if ((dst->lcn >= 0) && (src->lcn >= 0) &&
+ ((dst->lcn + dst->length) == src->lcn))
+ return true;
+ /* If we are merging two holes, we can merge them. */
+ if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE))
+ return true;
+ /* If we are merging two dealloc, we can merge them. */
+ if ((dst->lcn == LCN_DELALLOC) && (src->lcn == LCN_DELALLOC))
+ return true;
+ /* Cannot merge. */
+ return false;
+}
+
+/**
+ * __ntfs_rl_merge - merge two runlists without testing if they can be merged
+ * @dst: original, destination runlist
+ * @src: new runlist to merge with @dst
+ *
+ * Merge the two runlists, writing into the destination runlist @dst. The
+ * caller must make sure the runlists can be merged or this will corrupt the
+ * destination runlist.
+ *
+ * It is up to the caller to serialize access to the runlists @dst and @src.
+ */
+static inline void __ntfs_rl_merge(struct runlist_element *dst, struct runlist_element *src)
+{
+ dst->length += src->length;
+}
+
+/**
+ * ntfs_rl_append - append a runlist after a given element
+ *
+ * Append the runlist @src after element @loc in @dst. Merge the right end of
+ * the new runlist, if necessary. Adjust the size of the hole before the
+ * appended runlist.
+ *
+ * It is up to the caller to serialize access to the runlists @dst and @src.
+ *
+ * On success, return a pointer to the new, combined, runlist. Note, both
+ * runlists @dst and @src are deallocated before returning so you cannot use
+ * the pointers for anything any more. (Strictly speaking the returned runlist
+ * may be the same as @dst but this is irrelevant.)
+ */
+static inline struct runlist_element *ntfs_rl_append(struct runlist_element *dst,
+ int dsize, struct runlist_element *src, int ssize, int loc,
+ size_t *new_size)
+{
+ bool right = false; /* Right end of @src needs merging. */
+ int marker; /* End of the inserted runs. */
+
+ /* First, check if the right hand end needs merging. */
+ if ((loc + 1) < dsize)
+ right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+
+ /* Space required: @dst size + @src size, less one if we merged. */
+ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right);
+ if (IS_ERR(dst))
+ return dst;
+
+ *new_size = dsize + ssize - right;
+ /*
+ * We are guaranteed to succeed from here so can start modifying the
+ * original runlists.
+ */
+
+ /* First, merge the right hand end, if necessary. */
+ if (right)
+ __ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
+
+ /* First run after the @src runs that have been inserted. */
+ marker = loc + ssize + 1;
+
+ /* Move the tail of @dst out of the way, then copy in @src. */
+ ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
+ ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
+
+ /* Adjust the size of the preceding hole. */
+ dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
+
+ /* We may have changed the length of the file, so fix the end marker */
+ if (dst[marker].lcn == LCN_ENOENT)
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
+
+ return dst;
+}
+
+/**
+ * ntfs_rl_insert - insert a runlist into another
+ *
+ * Insert the runlist @src before element @loc in the runlist @dst. Merge the
+ * left end of the new runlist, if necessary. Adjust the size of the hole
+ * after the inserted runlist.
+ *
+ * It is up to the caller to serialize access to the runlists @dst and @src.
+ *
+ * On success, return a pointer to the new, combined, runlist. Note, both
+ * runlists @dst and @src are deallocated before returning so you cannot use
+ * the pointers for anything any more. (Strictly speaking the returned runlist
+ * may be the same as @dst but this is irrelevant.)
+ */
+static inline struct runlist_element *ntfs_rl_insert(struct runlist_element *dst,
+ int dsize, struct runlist_element *src, int ssize, int loc,
+ size_t *new_size)
+{
+ bool left = false; /* Left end of @src needs merging. */
+ bool disc = false; /* Discontinuity between @dst and @src. */
+ int marker; /* End of the inserted runs. */
+
+ /*
+ * disc => Discontinuity between the end of @dst and the start of @src.
+ * This means we might need to insert a "not mapped" run.
+ */
+ if (loc == 0)
+ disc = (src[0].vcn > 0);
+ else {
+ s64 merged_length;
+
+ left = ntfs_are_rl_mergeable(dst + loc - 1, src);
+
+ merged_length = dst[loc - 1].length;
+ if (left)
+ merged_length += src->length;
+
+ disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
+ }
+ /*
+ * Space required: @dst size + @src size, less one if we merged, plus
+ * one if there was a discontinuity.
+ */
+ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
+ if (IS_ERR(dst))
+ return dst;
+
+ *new_size = dsize + ssize - left + disc;
+ /*
+ * We are guaranteed to succeed from here so can start modifying the
+ * original runlist.
+ */
+ if (left)
+ __ntfs_rl_merge(dst + loc - 1, src);
+ /*
+ * First run after the @src runs that have been inserted.
+ * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+ * runs in @src. However, if @left, then the first run in @src has
+ * been merged with one in @dst. And if @disc, then @dst and @src do
+ * not meet and we need an extra run to fill the gap.
+ */
+ marker = loc + ssize - left + disc;
+
+ /* Move the tail of @dst out of the way, then copy in @src. */
+ ntfs_rl_mm(dst, marker, loc, dsize - loc);
+ ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
+
+ /* Adjust the VCN of the first run after the insertion... */
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
+ /* ... and the length. */
+ if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED ||
+ dst[marker].lcn == LCN_DELALLOC)
+ dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
+
+ /* Writing beyond the end of the file and there is a discontinuity. */
+ if (disc) {
+ if (loc > 0) {
+ dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
+ dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
+ } else {
+ dst[loc].vcn = 0;
+ dst[loc].length = dst[loc + 1].vcn;
+ }
+ dst[loc].lcn = LCN_RL_NOT_MAPPED;
+ }
+ return dst;
+}
+
+/**
+ * ntfs_rl_replace - overwrite a runlist element with another runlist
+ *
+ * Replace the runlist element @dst at @loc with @src. Merge the left and
+ * right ends of the inserted runlist, if necessary.
+ *
+ * It is up to the caller to serialize access to the runlists @dst and @src.
+ *
+ * On success, return a pointer to the new, combined, runlist. Note, both
+ * runlists @dst and @src are deallocated before returning so you cannot use
+ * the pointers for anything any more. (Strictly speaking the returned runlist
+ * may be the same as @dst but this is irrelevant.)
+ */
+static inline struct runlist_element *ntfs_rl_replace(struct runlist_element *dst,
+ int dsize, struct runlist_element *src, int ssize, int loc,
+ size_t *new_size)
+{
+ int delta;
+ bool left = false; /* Left end of @src needs merging. */
+ bool right = false; /* Right end of @src needs merging. */
+ int tail; /* Start of tail of @dst. */
+ int marker; /* End of the inserted runs. */
+
+ /* First, see if the left and right ends need merging. */
+ if ((loc + 1) < dsize)
+ right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+ if (loc > 0)
+ left = ntfs_are_rl_mergeable(dst + loc - 1, src);
+ /*
+ * Allocate some space. We will need less if the left, right, or both
+ * ends get merged. The -1 accounts for the run being replaced.
+ */
+ delta = ssize - 1 - left - right;
+ if (delta > 0) {
+ dst = ntfs_rl_realloc(dst, dsize, dsize + delta);
+ if (IS_ERR(dst))
+ return dst;
+ }
+
+ *new_size = dsize + delta;
+ /*
+ * We are guaranteed to succeed from here so can start modifying the
+ * original runlists.
+ */
+
+ /* First, merge the left and right ends, if necessary. */
+ if (right)
+ __ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
+ if (left)
+ __ntfs_rl_merge(dst + loc - 1, src);
+ /*
+ * Offset of the tail of @dst. This needs to be moved out of the way
+ * to make space for the runs to be copied from @src, i.e. the first
+ * run of the tail of @dst.
+ * Nominally, @tail equals @loc + 1, i.e. location, skipping the
+ * replaced run. However, if @right, then one of @dst's runs is
+ * already merged into @src.
+ */
+ tail = loc + right + 1;
+ /*
+ * First run after the @src runs that have been inserted, i.e. where
+ * the tail of @dst needs to be moved to.
+ * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+ * runs in @src. However, if @left, then the first run in @src has
+ * been merged with one in @dst.
+ */
+ marker = loc + ssize - left;
+
+ /* Move the tail of @dst out of the way, then copy in @src. */
+ ntfs_rl_mm(dst, marker, tail, dsize - tail);
+ ntfs_rl_mc(dst, loc, src, left, ssize - left);
+
+ /* We may have changed the length of the file, so fix the end marker. */
+ if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
+ dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
+ return dst;
+}
+
+/**
+ * ntfs_rl_split - insert a runlist into the centre of a hole
+ *
+ * Split the runlist @dst at @loc into two and insert @new in between the two
+ * fragments. No merging of runlists is necessary. Adjust the size of the
+ * holes either side.
+ *
+ * It is up to the caller to serialize access to the runlists @dst and @src.
+ *
+ * On success, return a pointer to the new, combined, runlist. Note, both
+ * runlists @dst and @src are deallocated before returning so you cannot use
+ * the pointers for anything any more. (Strictly speaking the returned runlist
+ * may be the same as @dst but this is irrelevant.)
+ */
+static inline struct runlist_element *ntfs_rl_split(struct runlist_element *dst, int dsize,
+ struct runlist_element *src, int ssize, int loc,
+ size_t *new_size)
+{
+ /* Space required: @dst size + @src size + one new hole. */
+ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize + 1);
+ if (IS_ERR(dst))
+ return dst;
+
+ *new_size = dsize + ssize + 1;
+ /*
+ * We are guaranteed to succeed from here so can start modifying the
+ * original runlists.
+ */
+
+ /* Move the tail of @dst out of the way, then copy in @src. */
+ ntfs_rl_mm(dst, loc + 1 + ssize, loc, dsize - loc);
+ ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
+
+ /* Adjust the size of the holes either size of @src. */
+ dst[loc].length = dst[loc+1].vcn - dst[loc].vcn;
+ dst[loc+ssize+1].vcn = dst[loc+ssize].vcn + dst[loc+ssize].length;
+ dst[loc+ssize+1].length = dst[loc+ssize+2].vcn - dst[loc+ssize+1].vcn;
+
+ return dst;
+}
+
+/**
+ * ntfs_runlists_merge - merge two runlists into one
+ *
+ * First we sanity check the two runlists @srl and @drl to make sure that they
+ * are sensible and can be merged. The runlist @srl must be either after the
+ * runlist @drl or completely within a hole (or unmapped region) in @drl.
+ *
+ * It is up to the caller to serialize access to the runlists @drl and @srl.
+ *
+ * Merging of runlists is necessary in two cases:
+ * 1. When attribute lists are used and a further extent is being mapped.
+ * 2. When new clusters are allocated to fill a hole or extend a file.
+ *
+ * There are four possible ways @srl can be merged. It can:
+ * - be inserted at the beginning of a hole,
+ * - split the hole in two and be inserted between the two fragments,
+ * - be appended at the end of a hole, or it can
+ * - replace the whole hole.
+ * It can also be appended to the end of the runlist, which is just a variant
+ * of the insert case.
+ *
+ * On success, return a pointer to the new, combined, runlist. Note, both
+ * runlists @drl and @srl are deallocated before returning so you cannot use
+ * the pointers for anything any more. (Strictly speaking the returned runlist
+ * may be the same as @dst but this is irrelevant.)
+ */
+struct runlist_element *ntfs_runlists_merge(struct runlist *d_runlist,
+ struct runlist_element *srl, size_t s_rl_count,
+ size_t *new_rl_count)
+{
+ int di, si; /* Current index into @[ds]rl. */
+ int sstart; /* First index with lcn > LCN_RL_NOT_MAPPED. */
+ int dins; /* Index into @drl at which to insert @srl. */
+ int dend, send; /* Last index into @[ds]rl. */
+ int dfinal, sfinal; /* The last index into @[ds]rl with lcn >= LCN_HOLE. */
+ int marker = 0;
+ s64 marker_vcn = 0;
+ struct runlist_element *drl = d_runlist->rl, *rl;
+
+#ifdef DEBUG
+ ntfs_debug("dst:");
+ ntfs_debug_dump_runlist(drl);
+ ntfs_debug("src:");
+ ntfs_debug_dump_runlist(srl);
+#endif
+
+ /* Check for silly calling... */
+ if (unlikely(!srl))
+ return drl;
+ if (IS_ERR(srl) || IS_ERR(drl))
+ return ERR_PTR(-EINVAL);
+
+ if (s_rl_count == 0) {
+ for (; srl[s_rl_count].length; s_rl_count++)
+ ;
+ s_rl_count++;
+ }
+
+ /* Check for the case where the first mapping is being done now. */
+ if (unlikely(!drl)) {
+ drl = srl;
+ /* Complete the source runlist if necessary. */
+ if (unlikely(drl[0].vcn)) {
+ /* Scan to the end of the source runlist. */
+ drl = ntfs_rl_realloc(drl, s_rl_count, s_rl_count + 1);
+ if (IS_ERR(drl))
+ return drl;
+ /* Insert start element at the front of the runlist. */
+ ntfs_rl_mm(drl, 1, 0, s_rl_count);
+ drl[0].vcn = 0;
+ drl[0].lcn = LCN_RL_NOT_MAPPED;
+ drl[0].length = drl[1].vcn;
+ s_rl_count++;
+ }
+
+ *new_rl_count = s_rl_count;
+ goto finished;
+ }
+
+ if (d_runlist->count < 1 || s_rl_count < 2)
+ return ERR_PTR(-EINVAL);
+
+ si = di = 0;
+
+ /* Skip any unmapped start element(s) in the source runlist. */
+ while (srl[si].length && srl[si].lcn < LCN_HOLE)
+ si++;
+
+ /* Can't have an entirely unmapped source runlist. */
+ WARN_ON(!srl[si].length);
+
+ /* Record the starting points. */
+ sstart = si;
+
+ /*
+ * Skip forward in @drl until we reach the position where @srl needs to
+ * be inserted. If we reach the end of @drl, @srl just needs to be
+ * appended to @drl.
+ */
+ rl = __ntfs_attr_find_vcn_nolock(d_runlist, srl[sstart].vcn);
+ if (IS_ERR(rl))
+ di = (int)d_runlist->count - 1;
+ else
+ di = (int)(rl - d_runlist->rl);
+ dins = di;
+
+ /* Sanity check for illegal overlaps. */
+ if ((drl[di].vcn == srl[si].vcn) && (drl[di].lcn >= 0) &&
+ (srl[si].lcn >= 0)) {
+ ntfs_error(NULL, "Run lists overlap. Cannot merge!");
+ return ERR_PTR(-ERANGE);
+ }
+
+ /* Scan to the end of both runlists in order to know their sizes. */
+ send = (int)s_rl_count - 1;
+ dend = (int)d_runlist->count - 1;
+
+ if (srl[send].lcn == LCN_ENOENT)
+ marker_vcn = srl[marker = send].vcn;
+
+ /* Scan to the last element with lcn >= LCN_HOLE. */
+ for (sfinal = send; sfinal >= 0 && srl[sfinal].lcn < LCN_HOLE; sfinal--)
+ ;
+ for (dfinal = dend; dfinal >= 0 && drl[dfinal].lcn < LCN_HOLE; dfinal--)
+ ;
+
+ {
+ bool start;
+ bool finish;
+ int ds = dend + 1; /* Number of elements in drl & srl */
+ int ss = sfinal - sstart + 1;
+
+ start = ((drl[dins].lcn < LCN_RL_NOT_MAPPED) || /* End of file */
+ (drl[dins].vcn == srl[sstart].vcn)); /* Start of hole */
+ finish = ((drl[dins].lcn >= LCN_RL_NOT_MAPPED) && /* End of file */
+ ((drl[dins].vcn + drl[dins].length) <= /* End of hole */
+ (srl[send - 1].vcn + srl[send - 1].length)));
+
+ /* Or we will lose an end marker. */
+ if (finish && !drl[dins].length)
+ ss++;
+ if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn))
+ finish = false;
+
+ if (start) {
+ if (finish)
+ drl = ntfs_rl_replace(drl, ds, srl + sstart, ss, dins, new_rl_count);
+ else
+ drl = ntfs_rl_insert(drl, ds, srl + sstart, ss, dins, new_rl_count);
+ } else {
+ if (finish)
+ drl = ntfs_rl_append(drl, ds, srl + sstart, ss, dins, new_rl_count);
+ else
+ drl = ntfs_rl_split(drl, ds, srl + sstart, ss, dins, new_rl_count);
+ }
+ if (IS_ERR(drl)) {
+ ntfs_error(NULL, "Merge failed.");
+ return drl;
+ }
+ ntfs_free(srl);
+ if (marker) {
+ ntfs_debug("Triggering marker code.");
+ for (ds = dend; drl[ds].length; ds++)
+ ;
+ /* We only need to care if @srl ended after @drl. */
+ if (drl[ds].vcn <= marker_vcn) {
+ int slots = 0;
+
+ if (drl[ds].vcn == marker_vcn) {
+ ntfs_debug("Old marker = 0x%llx, replacing with LCN_ENOENT.",
+ drl[ds].lcn);
+ drl[ds].lcn = LCN_ENOENT;
+ goto finished;
+ }
+ /*
+ * We need to create an unmapped runlist element in
+ * @drl or extend an existing one before adding the
+ * ENOENT terminator.
+ */
+ if (drl[ds].lcn == LCN_ENOENT) {
+ ds--;
+ slots = 1;
+ }
+ if (drl[ds].lcn != LCN_RL_NOT_MAPPED) {
+ /* Add an unmapped runlist element. */
+ if (!slots) {
+ drl = ntfs_rl_realloc_nofail(drl, ds,
+ ds + 2);
+ slots = 2;
+ *new_rl_count += 2;
+ }
+ ds++;
+ /* Need to set vcn if it isn't set already. */
+ if (slots != 1)
+ drl[ds].vcn = drl[ds - 1].vcn +
+ drl[ds - 1].length;
+ drl[ds].lcn = LCN_RL_NOT_MAPPED;
+ /* We now used up a slot. */
+ slots--;
+ }
+ drl[ds].length = marker_vcn - drl[ds].vcn;
+ /* Finally add the ENOENT terminator. */
+ ds++;
+ if (!slots) {
+ drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1);
+ *new_rl_count += 1;
+ }
+ drl[ds].vcn = marker_vcn;
+ drl[ds].lcn = LCN_ENOENT;
+ drl[ds].length = (s64)0;
+ }
+ }
+ }
+
+finished:
+ /* The merge was completed successfully. */
+ ntfs_debug("Merged runlist:");
+ ntfs_debug_dump_runlist(drl);
+ return drl;
+}
+
+/**
+ * ntfs_mapping_pairs_decompress - convert mapping pairs array to runlist
+ *
+ * It is up to the caller to serialize access to the runlist @old_rl.
+ *
+ * Decompress the attribute @attr's mapping pairs array into a runlist. On
+ * success, return the decompressed runlist.
+ *
+ * If @old_rl is not NULL, decompressed runlist is inserted into the
+ * appropriate place in @old_rl and the resultant, combined runlist is
+ * returned. The original @old_rl is deallocated.
+ *
+ * On error, return -errno. @old_rl is left unmodified in that case.
+ */
+struct runlist_element *ntfs_mapping_pairs_decompress(const struct ntfs_volume *vol,
+ const struct attr_record *attr, struct runlist *old_runlist,
+ size_t *new_rl_count)
+{
+ s64 vcn; /* Current vcn. */
+ s64 lcn; /* Current lcn. */
+ s64 deltaxcn; /* Change in [vl]cn. */
+ struct runlist_element *rl, *new_rl; /* The output runlist. */
+ u8 *buf; /* Current position in mapping pairs array. */
+ u8 *attr_end; /* End of attribute. */
+ int rlsize; /* Size of runlist buffer. */
+ u16 rlpos; /* Current runlist position in units of struct runlist_elements. */
+ u8 b; /* Current byte offset in buf. */
+
+#ifdef DEBUG
+ /* Make sure attr exists and is non-resident. */
+ if (!attr || !attr->non_resident ||
+ le64_to_cpu(attr->data.non_resident.lowest_vcn) < 0) {
+ ntfs_error(vol->sb, "Invalid arguments.");
+ return ERR_PTR(-EINVAL);
+ }
+#endif
+ /* Start at vcn = lowest_vcn and lcn 0. */
+ vcn = le64_to_cpu(attr->data.non_resident.lowest_vcn);
+ lcn = 0;
+ /* Get start of the mapping pairs array. */
+ buf = (u8 *)attr +
+ le16_to_cpu(attr->data.non_resident.mapping_pairs_offset);
+ attr_end = (u8 *)attr + le32_to_cpu(attr->length);
+ if (unlikely(buf < (u8 *)attr || buf > attr_end)) {
+ ntfs_error(vol->sb, "Corrupt attribute.");
+ return ERR_PTR(-EIO);
+ }
+
+ /* Current position in runlist array. */
+ rlpos = 0;
+ /* Allocate first page and set current runlist size to one page. */
+ rl = ntfs_malloc_nofs(rlsize = PAGE_SIZE);
+ if (unlikely(!rl))
+ return ERR_PTR(-ENOMEM);
+ /* Insert unmapped starting element if necessary. */
+ if (vcn) {
+ rl->vcn = 0;
+ rl->lcn = LCN_RL_NOT_MAPPED;
+ rl->length = vcn;
+ rlpos++;
+ }
+ while (buf < attr_end && *buf) {
+ /*
+ * Allocate more memory if needed, including space for the
+ * not-mapped and terminator elements. ntfs_malloc_nofs()
+ * operates on whole pages only.
+ */
+ if (((rlpos + 3) * sizeof(*rl)) > rlsize) {
+ struct runlist_element *rl2;
+
+ rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
+ if (unlikely(!rl2)) {
+ ntfs_free(rl);
+ return ERR_PTR(-ENOMEM);
+ }
+ memcpy(rl2, rl, rlsize);
+ ntfs_free(rl);
+ rl = rl2;
+ rlsize += PAGE_SIZE;
+ }
+ /* Enter the current vcn into the current runlist element. */
+ rl[rlpos].vcn = vcn;
+ /*
+ * Get the change in vcn, i.e. the run length in clusters.
+ * Doing it this way ensures that we signextend negative values.
+ * A negative run length doesn't make any sense, but hey, I
+ * didn't make up the NTFS specs and Windows NT4 treats the run
+ * length as a signed value so that's how it is...
+ */
+ b = *buf & 0xf;
+ if (b) {
+ if (unlikely(buf + b > attr_end))
+ goto io_error;
+ for (deltaxcn = (s8)buf[b--]; b; b--)
+ deltaxcn = (deltaxcn << 8) + buf[b];
+ } else { /* The length entry is compulsory. */
+ ntfs_error(vol->sb, "Missing length entry in mapping pairs array.");
+ deltaxcn = (s64)-1;
+ }
+ /*
+ * Assume a negative length to indicate data corruption and
+ * hence clean-up and return NULL.
+ */
+ if (unlikely(deltaxcn < 0)) {
+ ntfs_error(vol->sb, "Invalid length in mapping pairs array.");
+ goto err_out;
+ }
+ /*
+ * Enter the current run length into the current runlist
+ * element.
+ */
+ rl[rlpos].length = deltaxcn;
+ /* Increment the current vcn by the current run length. */
+ vcn += deltaxcn;
+ /*
+ * There might be no lcn change at all, as is the case for
+ * sparse clusters on NTFS 3.0+, in which case we set the lcn
+ * to LCN_HOLE.
+ */
+ if (!(*buf & 0xf0))
+ rl[rlpos].lcn = LCN_HOLE;
+ else {
+ /* Get the lcn change which really can be negative. */
+ u8 b2 = *buf & 0xf;
+
+ b = b2 + ((*buf >> 4) & 0xf);
+ if (buf + b > attr_end)
+ goto io_error;
+ for (deltaxcn = (s8)buf[b--]; b > b2; b--)
+ deltaxcn = (deltaxcn << 8) + buf[b];
+ /* Change the current lcn to its new value. */
+ lcn += deltaxcn;
+#ifdef DEBUG
+ /*
+ * On NTFS 1.2-, apparently can have lcn == -1 to
+ * indicate a hole. But we haven't verified ourselves
+ * whether it is really the lcn or the deltaxcn that is
+ * -1. So if either is found give us a message so we
+ * can investigate it further!
+ */
+ if (vol->major_ver < 3) {
+ if (unlikely(deltaxcn == -1))
+ ntfs_error(vol->sb, "lcn delta == -1");
+ if (unlikely(lcn == -1))
+ ntfs_error(vol->sb, "lcn == -1");
+ }
+#endif
+ /* Check lcn is not below -1. */
+ if (unlikely(lcn < -1)) {
+ ntfs_error(vol->sb, "Invalid s64 < -1 in mapping pairs array.");
+ goto err_out;
+ }
+
+ /* chkdsk accepts zero-sized runs only for holes */
+ if ((lcn != -1) && !rl[rlpos].length) {
+ ntfs_error(vol->sb, "Invalid zero-sized data run.\n");
+ goto err_out;
+ }
+
+ /* Enter the current lcn into the runlist element. */
+ rl[rlpos].lcn = lcn;
+ }
+ /* Get to the next runlist element, skipping zero-sized holes */
+ if (rl[rlpos].length)
+ rlpos++;
+ /* Increment the buffer position to the next mapping pair. */
+ buf += (*buf & 0xf) + ((*buf >> 4) & 0xf) + 1;
+ }
+ if (unlikely(buf >= attr_end))
+ goto io_error;
+ /*
+ * If there is a highest_vcn specified, it must be equal to the final
+ * vcn in the runlist - 1, or something has gone badly wrong.
+ */
+ deltaxcn = le64_to_cpu(attr->data.non_resident.highest_vcn);
+ if (unlikely(deltaxcn && vcn - 1 != deltaxcn)) {
+mpa_err:
+ ntfs_error(vol->sb, "Corrupt mapping pairs array in non-resident attribute.");
+ goto err_out;
+ }
+ /* Setup not mapped runlist element if this is the base extent. */
+ if (!attr->data.non_resident.lowest_vcn) {
+ s64 max_cluster;
+
+ max_cluster = ((le64_to_cpu(attr->data.non_resident.allocated_size) +
+ vol->cluster_size - 1) >>
+ vol->cluster_size_bits) - 1;
+ /*
+ * A highest_vcn of zero means this is a single extent
+ * attribute so simply terminate the runlist with LCN_ENOENT).
+ */
+ if (deltaxcn) {
+ /*
+ * If there is a difference between the highest_vcn and
+ * the highest cluster, the runlist is either corrupt
+ * or, more likely, there are more extents following
+ * this one.
+ */
+ if (deltaxcn < max_cluster) {
+ ntfs_debug("More extents to follow; deltaxcn = 0x%llx, max_cluster = 0x%llx",
+ deltaxcn, max_cluster);
+ rl[rlpos].vcn = vcn;
+ vcn += rl[rlpos].length = max_cluster -
+ deltaxcn;
+ rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
+ rlpos++;
+ } else if (unlikely(deltaxcn > max_cluster)) {
+ ntfs_error(vol->sb,
+ "Corrupt attribute. deltaxcn = 0x%llx, max_cluster = 0x%llx",
+ deltaxcn, max_cluster);
+ goto mpa_err;
+ }
+ }
+ rl[rlpos].lcn = LCN_ENOENT;
+ } else /* Not the base extent. There may be more extents to follow. */
+ rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
+
+ /* Setup terminating runlist element. */
+ rl[rlpos].vcn = vcn;
+ rl[rlpos].length = (s64)0;
+ /* If no existing runlist was specified, we are done. */
+ if (!old_runlist || !old_runlist->rl) {
+ *new_rl_count = rlpos + 1;
+ ntfs_debug("Mapping pairs array successfully decompressed:");
+ ntfs_debug_dump_runlist(rl);
+ return rl;
+ }
+ /* Now combine the new and old runlists checking for overlaps. */
+ new_rl = ntfs_runlists_merge(old_runlist, rl, rlpos + 1, new_rl_count);
+ if (!IS_ERR(new_rl))
+ return new_rl;
+ ntfs_free(rl);
+ ntfs_error(vol->sb, "Failed to merge runlists.");
+ return new_rl;
+io_error:
+ ntfs_error(vol->sb, "Corrupt attribute.");
+err_out:
+ ntfs_free(rl);
+ return ERR_PTR(-EIO);
+}
+
+/**
+ * ntfs_rl_vcn_to_lcn - convert a vcn into a lcn given a runlist
+ * @rl: runlist to use for conversion
+ * @vcn: vcn to convert
+ *
+ * Convert the virtual cluster number @vcn of an attribute into a logical
+ * cluster number (lcn) of a device using the runlist @rl to map vcns to their
+ * corresponding lcns.
+ *
+ * It is up to the caller to serialize access to the runlist @rl.
+ *
+ * Since lcns must be >= 0, we use negative return codes with special meaning:
+ *
+ * Return code Meaning / Description
+ * ==================================================
+ * LCN_HOLE Hole / not allocated on disk.
+ * LCN_RL_NOT_MAPPED This is part of the runlist which has not been
+ * inserted into the runlist yet.
+ * LCN_ENOENT There is no such vcn in the attribute.
+ *
+ * Locking: - The caller must have locked the runlist (for reading or writing).
+ * - This function does not touch the lock, nor does it modify the
+ * runlist.
+ */
+s64 ntfs_rl_vcn_to_lcn(const struct runlist_element *rl, const s64 vcn)
+{
+ int i;
+
+ /*
+ * If rl is NULL, assume that we have found an unmapped runlist. The
+ * caller can then attempt to map it and fail appropriately if
+ * necessary.
+ */
+ if (unlikely(!rl))
+ return LCN_RL_NOT_MAPPED;
+
+ /* Catch out of lower bounds vcn. */
+ if (unlikely(vcn < rl[0].vcn))
+ return LCN_ENOENT;
+
+ for (i = 0; likely(rl[i].length); i++) {
+ if (vcn < rl[i+1].vcn) {
+ if (likely(rl[i].lcn >= 0))
+ return rl[i].lcn + (vcn - rl[i].vcn);
+ return rl[i].lcn;
+ }
+ }
+ /*
+ * The terminator element is setup to the correct value, i.e. one of
+ * LCN_HOLE, LCN_RL_NOT_MAPPED, or LCN_ENOENT.
+ */
+ if (likely(rl[i].lcn < 0))
+ return rl[i].lcn;
+ /* Just in case... We could replace this with BUG() some day. */
+ return LCN_ENOENT;
+}
+
+/**
+ * ntfs_rl_find_vcn_nolock - find a vcn in a runlist
+ * @rl: runlist to search
+ * @vcn: vcn to find
+ *
+ * Find the virtual cluster number @vcn in the runlist @rl and return the
+ * address of the runlist element containing the @vcn on success.
+ *
+ * Return NULL if @rl is NULL or @vcn is in an unmapped part/out of bounds of
+ * the runlist.
+ *
+ * Locking: The runlist must be locked on entry.
+ */
+struct runlist_element *ntfs_rl_find_vcn_nolock(struct runlist_element *rl, const s64 vcn)
+{
+ if (unlikely(!rl || vcn < rl[0].vcn))
+ return NULL;
+ while (likely(rl->length)) {
+ if (unlikely(vcn < rl[1].vcn)) {
+ if (likely(rl->lcn >= LCN_HOLE))
+ return rl;
+ return NULL;
+ }
+ rl++;
+ }
+ if (likely(rl->lcn == LCN_ENOENT))
+ return rl;
+ return NULL;
+}
+
+/**
+ * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number
+ * @n: number for which to get the number of bytes for
+ *
+ * Return the number of bytes required to store @n unambiguously as
+ * a signed number.
+ *
+ * This is used in the context of the mapping pairs array to determine how
+ * many bytes will be needed in the array to store a given logical cluster
+ * number (lcn) or a specific run length.
+ *
+ * Return the number of bytes written. This function cannot fail.
+ */
+static inline int ntfs_get_nr_significant_bytes(const s64 n)
+{
+ s64 l = n;
+ int i;
+ s8 j;
+
+ i = 0;
+ do {
+ l >>= 8;
+ i++;
+ } while (l != 0 && l != -1);
+ j = (n >> 8 * (i - 1)) & 0xff;
+ /* If the sign bit is wrong, we need an extra byte. */
+ if ((n < 0 && j >= 0) || (n > 0 && j < 0))
+ i++;
+ return i;
+}
+
+/**
+ * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array
+ *
+ * Walk the locked runlist @rl and calculate the size in bytes of the mapping
+ * pairs array corresponding to the runlist @rl, starting at vcn @first_vcn and
+ * finishing with vcn @last_vcn.
+ *
+ * A @last_vcn of -1 means end of runlist and in that case the size of the
+ * mapping pairs array corresponding to the runlist starting at vcn @first_vcn
+ * and finishing at the end of the runlist is determined.
+ *
+ * This for example allows us to allocate a buffer of the right size when
+ * building the mapping pairs array.
+ *
+ * If @rl is NULL, just return 1 (for the single terminator byte).
+ *
+ * Return the calculated size in bytes on success. On error, return -errno.
+ */
+int ntfs_get_size_for_mapping_pairs(const struct ntfs_volume *vol,
+ const struct runlist_element *rl, const s64 first_vcn,
+ const s64 last_vcn, int max_mp_size)
+{
+ s64 prev_lcn;
+ int rls;
+ bool the_end = false;
+
+ if (first_vcn < 0 || last_vcn < -1)
+ return -EINVAL;
+
+ if (last_vcn >= 0 && first_vcn > last_vcn)
+ return -EINVAL;
+
+ if (!rl) {
+ WARN_ON(first_vcn);
+ WARN_ON(last_vcn > 0);
+ return 1;
+ }
+ if (max_mp_size <= 0)
+ max_mp_size = INT_MAX;
+ /* Skip to runlist element containing @first_vcn. */
+ while (rl->length && first_vcn >= rl[1].vcn)
+ rl++;
+ if (unlikely((!rl->length && first_vcn > rl->vcn) ||
+ first_vcn < rl->vcn))
+ return -EINVAL;
+ prev_lcn = 0;
+ /* Always need the termining zero byte. */
+ rls = 1;
+ /* Do the first partial run if present. */
+ if (first_vcn > rl->vcn) {
+ s64 delta, length = rl->length;
+
+ /* We know rl->length != 0 already. */
+ if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
+ goto err_out;
+ /*
+ * If @stop_vcn is given and finishes inside this run, cap the
+ * run length.
+ */
+ if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+ s64 s1 = last_vcn + 1;
+
+ if (unlikely(rl[1].vcn > s1))
+ length = s1 - rl->vcn;
+ the_end = true;
+ }
+ delta = first_vcn - rl->vcn;
+ /* Header byte + length. */
+ rls += 1 + ntfs_get_nr_significant_bytes(length - delta);
+ /*
+ * If the logical cluster number (lcn) denotes a hole and we
+ * are on NTFS 3.0+, we don't store it at all, i.e. we need
+ * zero space. On earlier NTFS versions we just store the lcn.
+ * Note: this assumes that on NTFS 1.2-, holes are stored with
+ * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
+ */
+ if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
+ prev_lcn = rl->lcn;
+ if (likely(rl->lcn >= 0))
+ prev_lcn += delta;
+ /* Change in lcn. */
+ rls += ntfs_get_nr_significant_bytes(prev_lcn);
+ }
+ /* Go to next runlist element. */
+ rl++;
+ }
+ /* Do the full runs. */
+ for (; rl->length && !the_end; rl++) {
+ s64 length = rl->length;
+
+ if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
+ goto err_out;
+ /*
+ * If @stop_vcn is given and finishes inside this run, cap the
+ * run length.
+ */
+ if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+ s64 s1 = last_vcn + 1;
+
+ if (unlikely(rl[1].vcn > s1))
+ length = s1 - rl->vcn;
+ the_end = true;
+ }
+ /* Header byte + length. */
+ rls += 1 + ntfs_get_nr_significant_bytes(length);
+ /*
+ * If the logical cluster number (lcn) denotes a hole and we
+ * are on NTFS 3.0+, we don't store it at all, i.e. we need
+ * zero space. On earlier NTFS versions we just store the lcn.
+ * Note: this assumes that on NTFS 1.2-, holes are stored with
+ * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
+ */
+ if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
+ /* Change in lcn. */
+ rls += ntfs_get_nr_significant_bytes(rl->lcn -
+ prev_lcn);
+ prev_lcn = rl->lcn;
+ }
+
+ if (rls > max_mp_size)
+ break;
+ }
+ return rls;
+err_out:
+ if (rl->lcn == LCN_RL_NOT_MAPPED)
+ rls = -EINVAL;
+ else
+ rls = -EIO;
+ return rls;
+}
+
+/**
+ * ntfs_write_significant_bytes - write the significant bytes of a number
+ * @dst: destination buffer to write to
+ * @dst_max: pointer to last byte of destination buffer for bounds checking
+ * @n: number whose significant bytes to write
+ *
+ * Store in @dst, the minimum bytes of the number @n which are required to
+ * identify @n unambiguously as a signed number, taking care not to exceed
+ * @dest_max, the maximum position within @dst to which we are allowed to
+ * write.
+ *
+ * This is used when building the mapping pairs array of a runlist to compress
+ * a given logical cluster number (lcn) or a specific run length to the minimum
+ * size possible.
+ *
+ * Return the number of bytes written on success. On error, i.e. the
+ * destination buffer @dst is too small, return -ENOSPC.
+ */
+static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
+ const s64 n)
+{
+ s64 l = n;
+ int i;
+ s8 j;
+
+ i = 0;
+ do {
+ if (unlikely(dst > dst_max))
+ goto err_out;
+ *dst++ = l & 0xffll;
+ l >>= 8;
+ i++;
+ } while (l != 0 && l != -1);
+ j = (n >> 8 * (i - 1)) & 0xff;
+ /* If the sign bit is wrong, we need an extra byte. */
+ if (n < 0 && j >= 0) {
+ if (unlikely(dst > dst_max))
+ goto err_out;
+ i++;
+ *dst = (s8)-1;
+ } else if (n > 0 && j < 0) {
+ if (unlikely(dst > dst_max))
+ goto err_out;
+ i++;
+ *dst = (s8)0;
+ }
+ return i;
+err_out:
+ return -ENOSPC;
+}
+
+/**
+ * ntfs_mapping_pairs_build - build the mapping pairs array from a runlist
+ *
+ * Create the mapping pairs array from the locked runlist @rl, starting at vcn
+ * @first_vcn and finishing with vcn @last_vcn and save the array in @dst.
+ * @dst_len is the size of @dst in bytes and it should be at least equal to the
+ * value obtained by calling ntfs_get_size_for_mapping_pairs().
+ *
+ * A @last_vcn of -1 means end of runlist and in that case the mapping pairs
+ * array corresponding to the runlist starting at vcn @first_vcn and finishing
+ * at the end of the runlist is created.
+ *
+ * If @rl is NULL, just write a single terminator byte to @dst.
+ *
+ * On success or -ENOSPC error, if @stop_vcn is not NULL, *@stop_vcn is set to
+ * the first vcn outside the destination buffer. Note that on error, @dst has
+ * been filled with all the mapping pairs that will fit, thus it can be treated
+ * as partial success, in that a new attribute extent needs to be created or
+ * the next extent has to be used and the mapping pairs build has to be
+ * continued with @first_vcn set to *@stop_vcn.
+ */
+int ntfs_mapping_pairs_build(const struct ntfs_volume *vol, s8 *dst,
+ const int dst_len, const struct runlist_element *rl,
+ const s64 first_vcn, const s64 last_vcn, s64 *const stop_vcn,
+ struct runlist_element **stop_rl, unsigned int *de_cluster_count)
+{
+ s64 prev_lcn;
+ s8 *dst_max, *dst_next;
+ int err = -ENOSPC;
+ bool the_end = false;
+ s8 len_len, lcn_len;
+ unsigned int de_cnt = 0;
+
+ if (first_vcn < 0 || last_vcn < -1 || dst_len < 1)
+ return -EINVAL;
+ if (last_vcn >= 0 && first_vcn > last_vcn)
+ return -EINVAL;
+
+ if (!rl) {
+ WARN_ON(first_vcn || last_vcn > 0);
+ if (stop_vcn)
+ *stop_vcn = 0;
+ /* Terminator byte. */
+ *dst = 0;
+ return 0;
+ }
+ /* Skip to runlist element containing @first_vcn. */
+ while (rl->length && first_vcn >= rl[1].vcn)
+ rl++;
+ if (unlikely((!rl->length && first_vcn > rl->vcn) ||
+ first_vcn < rl->vcn))
+ return -EINVAL;
+ /*
+ * @dst_max is used for bounds checking in
+ * ntfs_write_significant_bytes().
+ */
+ dst_max = dst + dst_len - 1;
+ prev_lcn = 0;
+ /* Do the first partial run if present. */
+ if (first_vcn > rl->vcn) {
+ s64 delta, length = rl->length;
+
+ /* We know rl->length != 0 already. */
+ if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
+ goto err_out;
+ /*
+ * If @stop_vcn is given and finishes inside this run, cap the
+ * run length.
+ */
+ if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+ s64 s1 = last_vcn + 1;
+
+ if (unlikely(rl[1].vcn > s1))
+ length = s1 - rl->vcn;
+ the_end = true;
+ }
+ delta = first_vcn - rl->vcn;
+ /* Write length. */
+ len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
+ length - delta);
+ if (unlikely(len_len < 0))
+ goto size_err;
+ /*
+ * If the logical cluster number (lcn) denotes a hole and we
+ * are on NTFS 3.0+, we don't store it at all, i.e. we need
+ * zero space. On earlier NTFS versions we just write the lcn
+ * change.
+ */
+ if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
+ prev_lcn = rl->lcn;
+ if (likely(rl->lcn >= 0))
+ prev_lcn += delta;
+ /* Write change in lcn. */
+ lcn_len = ntfs_write_significant_bytes(dst + 1 +
+ len_len, dst_max, prev_lcn);
+ if (unlikely(lcn_len < 0))
+ goto size_err;
+ } else
+ lcn_len = 0;
+ dst_next = dst + len_len + lcn_len + 1;
+ if (unlikely(dst_next > dst_max))
+ goto size_err;
+ /* Update header byte. */
+ *dst = lcn_len << 4 | len_len;
+ /* Position at next mapping pairs array element. */
+ dst = dst_next;
+ /* Go to next runlist element. */
+ rl++;
+ }
+ /* Do the full runs. */
+ for (; rl->length && !the_end; rl++) {
+ s64 length = rl->length;
+
+ if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
+ goto err_out;
+ /*
+ * If @stop_vcn is given and finishes inside this run, cap the
+ * run length.
+ */
+ if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
+ s64 s1 = last_vcn + 1;
+
+ if (unlikely(rl[1].vcn > s1))
+ length = s1 - rl->vcn;
+ the_end = true;
+ }
+ /* Write length. */
+ len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
+ length);
+ if (unlikely(len_len < 0))
+ goto size_err;
+ /*
+ * If the logical cluster number (lcn) denotes a hole and we
+ * are on NTFS 3.0+, we don't store it at all, i.e. we need
+ * zero space. On earlier NTFS versions we just write the lcn
+ * change.
+ */
+ if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
+ /* Write change in lcn. */
+ lcn_len = ntfs_write_significant_bytes(dst + 1 +
+ len_len, dst_max, rl->lcn - prev_lcn);
+ if (unlikely(lcn_len < 0))
+ goto size_err;
+ prev_lcn = rl->lcn;
+ } else {
+ if (rl->lcn == LCN_DELALLOC)
+ de_cnt += rl->length;
+ lcn_len = 0;
+ }
+ dst_next = dst + len_len + lcn_len + 1;
+ if (unlikely(dst_next > dst_max))
+ goto size_err;
+ /* Update header byte. */
+ *dst = lcn_len << 4 | len_len;
+ /* Position at next mapping pairs array element. */
+ dst = dst_next;
+ }
+ /* Success. */
+ if (de_cluster_count)
+ *de_cluster_count = de_cnt;
+ err = 0;
+size_err:
+ /* Set stop vcn. */
+ if (stop_vcn)
+ *stop_vcn = rl->vcn;
+ if (stop_rl)
+ *stop_rl = (struct runlist_element *)rl;
+ /* Add terminator byte. */
+ *dst = 0;
+ return err;
+err_out:
+ if (rl->lcn == LCN_RL_NOT_MAPPED)
+ err = -EINVAL;
+ else
+ err = -EIO;
+ return err;
+}
+
+/**
+ * ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn
+ * @vol: ntfs volume (needed for error output)
+ * @runlist: runlist to truncate
+ * @new_length: the new length of the runlist in VCNs
+ *
+ * Truncate the runlist described by @runlist as well as the memory buffer
+ * holding the runlist elements to a length of @new_length VCNs.
+ *
+ * If @new_length lies within the runlist, the runlist elements with VCNs of
+ * @new_length and above are discarded. As a special case if @new_length is
+ * zero, the runlist is discarded and set to NULL.
+ *
+ * If @new_length lies beyond the runlist, a sparse runlist element is added to
+ * the end of the runlist @runlist or if the last runlist element is a sparse
+ * one already, this is extended.
+ *
+ * Note, no checking is done for unmapped runlist elements. It is assumed that
+ * the caller has mapped any elements that need to be mapped already.
+ *
+ * Return 0 on success and -errno on error.
+ */
+int ntfs_rl_truncate_nolock(const struct ntfs_volume *vol, struct runlist *const runlist,
+ const s64 new_length)
+{
+ struct runlist_element *rl;
+ int old_size;
+
+ ntfs_debug("Entering for new_length 0x%llx.", (long long)new_length);
+
+ if (!runlist || new_length < 0)
+ return -EINVAL;
+
+ rl = runlist->rl;
+ if (new_length < rl->vcn)
+ return -EINVAL;
+
+ /* Find @new_length in the runlist. */
+ while (likely(rl->length && new_length >= rl[1].vcn))
+ rl++;
+ /*
+ * If not at the end of the runlist we need to shrink it.
+ * If at the end of the runlist we need to expand it.
+ */
+ if (rl->length) {
+ struct runlist_element *trl;
+ bool is_end;
+
+ ntfs_debug("Shrinking runlist.");
+ /* Determine the runlist size. */
+ trl = rl + 1;
+ while (likely(trl->length))
+ trl++;
+ old_size = trl - runlist->rl + 1;
+ /* Truncate the run. */
+ rl->length = new_length - rl->vcn;
+ /*
+ * If a run was partially truncated, make the following runlist
+ * element a terminator.
+ */
+ is_end = false;
+ if (rl->length) {
+ rl++;
+ if (!rl->length)
+ is_end = true;
+ rl->vcn = new_length;
+ rl->length = 0;
+ }
+ rl->lcn = LCN_ENOENT;
+ runlist->count = rl - runlist->rl + 1;
+ /* Reallocate memory if necessary. */
+ if (!is_end) {
+ int new_size = rl - runlist->rl + 1;
+
+ rl = ntfs_rl_realloc(runlist->rl, old_size, new_size);
+ if (IS_ERR(rl))
+ ntfs_warning(vol->sb,
+ "Failed to shrink runlist buffer. This just wastes a bit of memory temporarily so we ignore it and return success.");
+ else
+ runlist->rl = rl;
+ }
+ } else if (likely(/* !rl->length && */ new_length > rl->vcn)) {
+ ntfs_debug("Expanding runlist.");
+ /*
+ * If there is a previous runlist element and it is a sparse
+ * one, extend it. Otherwise need to add a new, sparse runlist
+ * element.
+ */
+ if ((rl > runlist->rl) && ((rl - 1)->lcn == LCN_HOLE))
+ (rl - 1)->length = new_length - (rl - 1)->vcn;
+ else {
+ /* Determine the runlist size. */
+ old_size = rl - runlist->rl + 1;
+ /* Reallocate memory if necessary. */
+ rl = ntfs_rl_realloc(runlist->rl, old_size,
+ old_size + 1);
+ if (IS_ERR(rl)) {
+ ntfs_error(vol->sb, "Failed to expand runlist buffer, aborting.");
+ return PTR_ERR(rl);
+ }
+ runlist->rl = rl;
+ /*
+ * Set @rl to the same runlist element in the new
+ * runlist as before in the old runlist.
+ */
+ rl += old_size - 1;
+ /* Add a new, sparse runlist element. */
+ rl->lcn = LCN_HOLE;
+ rl->length = new_length - rl->vcn;
+ /* Add a new terminator runlist element. */
+ rl++;
+ rl->length = 0;
+ runlist->count = old_size + 1;
+ }
+ rl->vcn = new_length;
+ rl->lcn = LCN_ENOENT;
+ } else /* if (unlikely(!rl->length && new_length == rl->vcn)) */ {
+ /* Runlist already has same size as requested. */
+ rl->lcn = LCN_ENOENT;
+ }
+ ntfs_debug("Done.");
+ return 0;
+}
+
+/**
+ * ntfs_rl_sparse - check whether runlist have sparse regions or not.
+ * @rl: runlist to check
+ *
+ * Return 1 if have, 0 if not, -errno on error.
+ */
+int ntfs_rl_sparse(struct runlist_element *rl)
+{
+ struct runlist_element *rlc;
+
+ if (!rl)
+ return -EINVAL;
+
+ for (rlc = rl; rlc->length; rlc++)
+ if (rlc->lcn < 0) {
+ if (rlc->lcn != LCN_HOLE && rlc->lcn != LCN_DELALLOC) {
+ pr_err("%s: bad runlist", __func__);
+ return -EINVAL;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * ntfs_rl_get_compressed_size - calculate length of non sparse regions
+ * @vol: ntfs volume (need for cluster size)
+ * @rl: runlist to calculate for
+ *
+ * Return compressed size or -errno on error.
+ */
+s64 ntfs_rl_get_compressed_size(struct ntfs_volume *vol, struct runlist_element *rl)
+{
+ struct runlist_element *rlc;
+ s64 ret = 0;
+
+ if (!rl)
+ return -EINVAL;
+
+ for (rlc = rl; rlc->length; rlc++) {
+ if (rlc->lcn < 0) {
+ if (rlc->lcn != LCN_HOLE && rlc->lcn != LCN_DELALLOC) {
+ ntfs_error(vol->sb, "%s: bad runlist, rlc->lcn : %lld",
+ __func__, rlc->lcn);
+ return -EINVAL;
+ }
+ } else
+ ret += rlc->length;
+ }
+ return ret << vol->cluster_size_bits;
+}
+
+static inline bool ntfs_rle_lcn_contiguous(struct runlist_element *left_rle,
+ struct runlist_element *right_rle)
+{
+ if (left_rle->lcn > LCN_HOLE &&
+ left_rle->lcn + left_rle->length == right_rle->lcn)
+ return true;
+ else if (left_rle->lcn == LCN_HOLE && right_rle->lcn == LCN_HOLE)
+ return true;
+ else
+ return false;
+}
+
+static inline bool ntfs_rle_contain(struct runlist_element *rle, s64 vcn)
+{
+ if (rle->length > 0 &&
+ vcn >= rle->vcn && vcn < rle->vcn + rle->length)
+ return true;
+ else
+ return false;
+}
+
+struct runlist_element *ntfs_rl_insert_range(struct runlist_element *dst_rl, int dst_cnt,
+ struct runlist_element *src_rl, int src_cnt,
+ size_t *new_rl_cnt)
+{
+ struct runlist_element *i_rl, *new_rl, *src_rl_origin = src_rl;
+ struct runlist_element dst_rl_split;
+ s64 start_vcn = src_rl[0].vcn;
+ int new_1st_cnt, new_2nd_cnt, new_3rd_cnt, new_cnt;
+
+ if (!dst_rl || !src_rl || !new_rl_cnt)
+ return ERR_PTR(-EINVAL);
+ if (dst_cnt <= 0 || src_cnt <= 0)
+ return ERR_PTR(-EINVAL);
+ if (!(dst_rl[dst_cnt - 1].lcn == LCN_ENOENT &&
+ dst_rl[dst_cnt - 1].length == 0) ||
+ src_rl[src_cnt - 1].lcn < LCN_HOLE)
+ return ERR_PTR(-EINVAL);
+
+ start_vcn = src_rl[0].vcn;
+
+ i_rl = ntfs_rl_find_vcn_nolock(dst_rl, start_vcn);
+ if (!i_rl ||
+ (i_rl->lcn == LCN_ENOENT && i_rl->vcn != start_vcn) ||
+ (i_rl->lcn != LCN_ENOENT && !ntfs_rle_contain(i_rl, start_vcn)))
+ return ERR_PTR(-EINVAL);
+
+ new_1st_cnt = (int)(i_rl - dst_rl);
+ if (new_1st_cnt > dst_cnt)
+ return ERR_PTR(-EINVAL);
+ new_3rd_cnt = dst_cnt - new_1st_cnt;
+ if (new_3rd_cnt < 1)
+ return ERR_PTR(-EINVAL);
+
+ if (i_rl[0].vcn != start_vcn) {
+ if (i_rl[0].lcn == LCN_HOLE && src_rl[0].lcn == LCN_HOLE)
+ goto merge_src_rle;
+
+ /* split @i_rl[0] and create @dst_rl_split */
+ dst_rl_split.vcn = i_rl[0].vcn;
+ dst_rl_split.length = start_vcn - i_rl[0].vcn;
+ dst_rl_split.lcn = i_rl[0].lcn;
+
+ i_rl[0].vcn = start_vcn;
+ i_rl[0].length -= dst_rl_split.length;
+ i_rl[0].lcn += dst_rl_split.length;
+ } else {
+ struct runlist_element *dst_rle, *src_rle;
+merge_src_rle:
+
+ /* not split @i_rl[0] */
+ dst_rl_split.lcn = LCN_ENOENT;
+
+ /* merge @src_rl's first run and @i_rl[0]'s left run if possible */
+ dst_rle = &dst_rl[new_1st_cnt - 1];
+ src_rle = &src_rl[0];
+ if (new_1st_cnt > 0 && ntfs_rle_lcn_contiguous(dst_rle, src_rle)) {
+ WARN_ON(dst_rle->vcn + dst_rle->length != src_rle->vcn);
+ dst_rle->length += src_rle->length;
+ src_rl++;
+ src_cnt--;
+ } else {
+ /* merge @src_rl's last run and @i_rl[0]'s right if possible */
+ dst_rle = &dst_rl[new_1st_cnt];
+ src_rle = &src_rl[src_cnt - 1];
+
+ if (ntfs_rle_lcn_contiguous(dst_rle, src_rle)) {
+ dst_rle->length += src_rle->length;
+ src_cnt--;
+ }
+ }
+ }
+
+ new_2nd_cnt = src_cnt;
+ new_cnt = new_1st_cnt + new_2nd_cnt + new_3rd_cnt;
+ new_cnt += dst_rl_split.lcn >= LCN_HOLE ? 1 : 0;
+ new_rl = ntfs_malloc_nofs(new_cnt * sizeof(*new_rl));
+ if (!new_rl)
+ return ERR_PTR(-ENOMEM);
+
+ /* Copy the @dst_rl's first half to @new_rl */
+ ntfs_rl_mc(new_rl, 0, dst_rl, 0, new_1st_cnt);
+ if (dst_rl_split.lcn >= LCN_HOLE) {
+ ntfs_rl_mc(new_rl, new_1st_cnt, &dst_rl_split, 0, 1);
+ new_1st_cnt++;
+ }
+ /* Copy the @src_rl to @new_rl */
+ ntfs_rl_mc(new_rl, new_1st_cnt, src_rl, 0, new_2nd_cnt);
+ /* Copy the @dst_rl's second half to @new_rl */
+ if (new_3rd_cnt >= 1) {
+ struct runlist_element *rl, *rl_3rd;
+ int dst_1st_cnt = dst_rl_split.lcn >= LCN_HOLE ?
+ new_1st_cnt - 1 : new_1st_cnt;
+
+ ntfs_rl_mc(new_rl, new_1st_cnt + new_2nd_cnt,
+ dst_rl, dst_1st_cnt, new_3rd_cnt);
+ /* Update vcn of the @dst_rl's second half runs to reflect
+ * appended @src_rl.
+ */
+ if (new_1st_cnt + new_2nd_cnt == 0) {
+ rl_3rd = &new_rl[new_1st_cnt + new_2nd_cnt + 1];
+ rl = &new_rl[new_1st_cnt + new_2nd_cnt];
+ } else {
+ rl_3rd = &new_rl[new_1st_cnt + new_2nd_cnt];
+ rl = &new_rl[new_1st_cnt + new_2nd_cnt - 1];
+ }
+ do {
+ rl_3rd->vcn = rl->vcn + rl->length;
+ if (rl_3rd->length <= 0)
+ break;
+ rl = rl_3rd;
+ rl_3rd++;
+ } while (1);
+ }
+ *new_rl_cnt = new_1st_cnt + new_2nd_cnt + new_3rd_cnt;
+
+ ntfs_free(dst_rl);
+ ntfs_free(src_rl_origin);
+ return new_rl;
+}
+
+struct runlist_element *ntfs_rl_punch_hole(struct runlist_element *dst_rl, int dst_cnt,
+ s64 start_vcn, s64 len,
+ struct runlist_element **punch_rl,
+ size_t *new_rl_cnt)
+{
+ struct runlist_element *s_rl, *e_rl, *new_rl, *dst_3rd_rl, hole_rl[1];
+ s64 end_vcn;
+ int new_1st_cnt, dst_3rd_cnt, new_cnt, punch_cnt, merge_cnt;
+ bool begin_split, end_split, one_split_3;
+
+ if (dst_cnt < 2 ||
+ !(dst_rl[dst_cnt - 1].lcn == LCN_ENOENT &&
+ dst_rl[dst_cnt - 1].length == 0))
+ return ERR_PTR(-EINVAL);
+
+ end_vcn = min(start_vcn + len - 1,
+ dst_rl[dst_cnt - 2].vcn + dst_rl[dst_cnt - 2].length - 1);
+
+ s_rl = ntfs_rl_find_vcn_nolock(dst_rl, start_vcn);
+ if (!s_rl ||
+ s_rl->lcn <= LCN_ENOENT ||
+ !ntfs_rle_contain(s_rl, start_vcn))
+ return ERR_PTR(-EINVAL);
+
+ begin_split = s_rl->vcn != start_vcn ? true : false;
+
+ e_rl = ntfs_rl_find_vcn_nolock(dst_rl, end_vcn);
+ if (!e_rl ||
+ e_rl->lcn <= LCN_ENOENT ||
+ !ntfs_rle_contain(e_rl, end_vcn))
+ return ERR_PTR(-EINVAL);
+
+ end_split = e_rl->vcn + e_rl->length - 1 != end_vcn ? true : false;
+
+ /* @s_rl has to be split into left, punched hole, and right */
+ one_split_3 = e_rl == s_rl && begin_split && end_split ? true : false;
+
+ punch_cnt = (int)(e_rl - s_rl) + 1;
+
+ *punch_rl = ntfs_malloc_nofs((punch_cnt + 1) * sizeof(struct runlist_element));
+ if (!*punch_rl)
+ return ERR_PTR(-ENOMEM);
+
+ new_cnt = dst_cnt - (int)(e_rl - s_rl + 1) + 3;
+ new_rl = ntfs_malloc_nofs(new_cnt * sizeof(struct runlist_element));
+ if (!new_rl) {
+ ntfs_free(*punch_rl);
+ *punch_rl = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ new_1st_cnt = (int)(s_rl - dst_rl) + 1;
+ ntfs_rl_mc(*punch_rl, 0, dst_rl, new_1st_cnt - 1, punch_cnt);
+
+ (*punch_rl)[punch_cnt].lcn = LCN_ENOENT;
+ (*punch_rl)[punch_cnt].length = 0;
+
+ if (!begin_split)
+ new_1st_cnt--;
+ dst_3rd_rl = e_rl;
+ dst_3rd_cnt = (int)(&dst_rl[dst_cnt - 1] - e_rl) + 1;
+ if (!end_split) {
+ dst_3rd_rl++;
+ dst_3rd_cnt--;
+ }
+
+ /* Copy the 1st part of @dst_rl into @new_rl */
+ ntfs_rl_mc(new_rl, 0, dst_rl, 0, new_1st_cnt);
+ if (begin_split) {
+ /* the @e_rl has to be splited and copied into the last of @new_rl
+ * and the first of @punch_rl
+ */
+ s64 first_cnt = start_vcn - dst_rl[new_1st_cnt - 1].vcn;
+
+ if (new_1st_cnt)
+ new_rl[new_1st_cnt - 1].length = first_cnt;
+
+ (*punch_rl)[0].vcn = start_vcn;
+ (*punch_rl)[0].length -= first_cnt;
+ if ((*punch_rl)[0].lcn > LCN_HOLE)
+ (*punch_rl)[0].lcn += first_cnt;
+ }
+
+ /* Copy a hole into @new_rl */
+ hole_rl[0].vcn = start_vcn;
+ hole_rl[0].length = (s64)len;
+ hole_rl[0].lcn = LCN_HOLE;
+ ntfs_rl_mc(new_rl, new_1st_cnt, hole_rl, 0, 1);
+
+ /* Copy the 3rd part of @dst_rl into @new_rl */
+ ntfs_rl_mc(new_rl, new_1st_cnt + 1, dst_3rd_rl, 0, dst_3rd_cnt);
+ if (end_split) {
+ /* the @e_rl has to be splited and copied into the first of
+ * @new_rl and the last of @punch_rl
+ */
+ s64 first_cnt = end_vcn - dst_3rd_rl[0].vcn + 1;
+
+ new_rl[new_1st_cnt + 1].vcn = end_vcn + 1;
+ new_rl[new_1st_cnt + 1].length -= first_cnt;
+ if (new_rl[new_1st_cnt + 1].lcn > LCN_HOLE)
+ new_rl[new_1st_cnt + 1].lcn += first_cnt;
+
+ if (one_split_3)
+ (*punch_rl)[punch_cnt - 1].length -=
+ new_rl[new_1st_cnt + 1].length;
+ else
+ (*punch_rl)[punch_cnt - 1].length = first_cnt;
+ }
+
+ /* Merge left and hole, or hole and right in @new_rl, if left or right
+ * consists of holes.
+ */
+ merge_cnt = 0;
+ if (new_1st_cnt > 0 && new_rl[new_1st_cnt - 1].lcn == LCN_HOLE) {
+ /* Merge right and hole */
+ s_rl = &new_rl[new_1st_cnt - 1];
+ s_rl->length += s_rl[1].length;
+ merge_cnt = 1;
+ /* Merge left and right */
+ if (new_1st_cnt + 1 < new_cnt &&
+ new_rl[new_1st_cnt + 1].lcn == LCN_HOLE) {
+ s_rl->length += s_rl[2].length;
+ merge_cnt++;
+ }
+ } else if (new_1st_cnt + 1 < new_cnt &&
+ new_rl[new_1st_cnt + 1].lcn == LCN_HOLE) {
+ /* Merge left and hole */
+ s_rl = &new_rl[new_1st_cnt];
+ s_rl->length += s_rl[1].length;
+ merge_cnt = 1;
+ }
+ if (merge_cnt) {
+ struct runlist_element *d_rl, *src_rl;
+
+ d_rl = s_rl + 1;
+ src_rl = s_rl + 1 + merge_cnt;
+ ntfs_rl_mm(new_rl, (int)(d_rl - new_rl), (int)(src_rl - new_rl),
+ (int)(&new_rl[new_cnt - 1] - src_rl) + 1);
+ }
+
+ (*punch_rl)[punch_cnt].vcn = (*punch_rl)[punch_cnt - 1].vcn +
+ (*punch_rl)[punch_cnt - 1].length;
+
+ /* punch_cnt elements of dst are replaced with one hole */
+ *new_rl_cnt = dst_cnt - (punch_cnt - (int)begin_split - (int)end_split) +
+ 1 - merge_cnt;
+ ntfs_free(dst_rl);
+ return new_rl;
+}
+
+struct runlist_element *ntfs_rl_collapse_range(struct runlist_element *dst_rl, int dst_cnt,
+ s64 start_vcn, s64 len,
+ struct runlist_element **punch_rl,
+ size_t *new_rl_cnt)
+{
+ struct runlist_element *s_rl, *e_rl, *new_rl, *dst_3rd_rl;
+ s64 end_vcn;
+ int new_1st_cnt, dst_3rd_cnt, new_cnt, punch_cnt, merge_cnt, i;
+ bool begin_split, end_split, one_split_3;
+
+ if (dst_cnt < 2 ||
+ !(dst_rl[dst_cnt - 1].lcn == LCN_ENOENT &&
+ dst_rl[dst_cnt - 1].length == 0))
+ return ERR_PTR(-EINVAL);
+
+ end_vcn = min(start_vcn + len - 1,
+ dst_rl[dst_cnt - 1].vcn - 1);
+
+ s_rl = ntfs_rl_find_vcn_nolock(dst_rl, start_vcn);
+ if (!s_rl ||
+ s_rl->lcn <= LCN_ENOENT ||
+ !ntfs_rle_contain(s_rl, start_vcn))
+ return ERR_PTR(-EINVAL);
+
+ begin_split = s_rl->vcn != start_vcn ? true : false;
+
+ e_rl = ntfs_rl_find_vcn_nolock(dst_rl, end_vcn);
+ if (!e_rl ||
+ e_rl->lcn <= LCN_ENOENT ||
+ !ntfs_rle_contain(e_rl, end_vcn))
+ return ERR_PTR(-EINVAL);
+
+ end_split = e_rl->vcn + e_rl->length - 1 != end_vcn ? true : false;
+
+ /* @s_rl has to be split into left, collapsed, and right */
+ one_split_3 = e_rl == s_rl && begin_split && end_split ? true : false;
+
+ punch_cnt = (int)(e_rl - s_rl) + 1;
+ *punch_rl = ntfs_malloc_nofs((punch_cnt + 1) * sizeof(struct runlist_element));
+ if (!*punch_rl)
+ return ERR_PTR(-ENOMEM);
+
+ new_cnt = dst_cnt - (int)(e_rl - s_rl + 1) + 3;
+ new_rl = ntfs_malloc_nofs(new_cnt * sizeof(struct runlist_element));
+ if (!new_rl) {
+ ntfs_free(*punch_rl);
+ *punch_rl = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ new_1st_cnt = (int)(s_rl - dst_rl) + 1;
+ ntfs_rl_mc(*punch_rl, 0, dst_rl, new_1st_cnt - 1, punch_cnt);
+ (*punch_rl)[punch_cnt].lcn = LCN_ENOENT;
+ (*punch_rl)[punch_cnt].length = 0;
+
+ if (!begin_split)
+ new_1st_cnt--;
+ dst_3rd_rl = e_rl;
+ dst_3rd_cnt = (int)(&dst_rl[dst_cnt - 1] - e_rl) + 1;
+ if (!end_split) {
+ dst_3rd_rl++;
+ dst_3rd_cnt--;
+ }
+
+ /* Copy the 1st part of @dst_rl into @new_rl */
+ ntfs_rl_mc(new_rl, 0, dst_rl, 0, new_1st_cnt);
+ if (begin_split) {
+ /* the @e_rl has to be splited and copied into the last of @new_rl
+ * and the first of @punch_rl
+ */
+ s64 first_cnt = start_vcn - dst_rl[new_1st_cnt - 1].vcn;
+
+ new_rl[new_1st_cnt - 1].length = first_cnt;
+
+ (*punch_rl)[0].vcn = start_vcn;
+ (*punch_rl)[0].length -= first_cnt;
+ if ((*punch_rl)[0].lcn > LCN_HOLE)
+ (*punch_rl)[0].lcn += first_cnt;
+ }
+
+ /* Copy the 3rd part of @dst_rl into @new_rl */
+ ntfs_rl_mc(new_rl, new_1st_cnt, dst_3rd_rl, 0, dst_3rd_cnt);
+ if (end_split) {
+ /* the @e_rl has to be splited and copied into the first of
+ * @new_rl and the last of @punch_rl
+ */
+ s64 first_cnt = end_vcn - dst_3rd_rl[0].vcn + 1;
+
+ new_rl[new_1st_cnt].vcn = end_vcn + 1;
+ new_rl[new_1st_cnt].length -= first_cnt;
+ if (new_rl[new_1st_cnt].lcn > LCN_HOLE)
+ new_rl[new_1st_cnt].lcn += first_cnt;
+
+ if (one_split_3)
+ (*punch_rl)[punch_cnt - 1].length -=
+ new_rl[new_1st_cnt].length;
+ else
+ (*punch_rl)[punch_cnt - 1].length = first_cnt;
+ }
+
+ /* Adjust vcn */
+ if (new_1st_cnt == 0)
+ new_rl[new_1st_cnt].vcn = 0;
+ for (i = new_1st_cnt == 0 ? 1 : new_1st_cnt; new_rl[i].length; i++)
+ new_rl[i].vcn = new_rl[i - 1].vcn + new_rl[i - 1].length;
+ new_rl[i].vcn = new_rl[i - 1].vcn + new_rl[i - 1].length;
+
+ /* Merge left and hole, or hole and right in @new_rl, if left or right
+ * consists of holes.
+ */
+ merge_cnt = 0;
+ i = new_1st_cnt == 0 ? 1 : new_1st_cnt;
+ if (ntfs_rle_lcn_contiguous(&new_rl[i - 1], &new_rl[i])) {
+ /* Merge right and left */
+ s_rl = &new_rl[new_1st_cnt - 1];
+ s_rl->length += s_rl[1].length;
+ merge_cnt = 1;
+ }
+ if (merge_cnt) {
+ struct runlist_element *d_rl, *src_rl;
+
+ d_rl = s_rl + 1;
+ src_rl = s_rl + 1 + merge_cnt;
+ ntfs_rl_mm(new_rl, (int)(d_rl - new_rl), (int)(src_rl - new_rl),
+ (int)(&new_rl[new_cnt - 1] - src_rl) + 1);
+ }
+
+ (*punch_rl)[punch_cnt].vcn = (*punch_rl)[punch_cnt - 1].vcn +
+ (*punch_rl)[punch_cnt - 1].length;
+
+ /* punch_cnt elements of dst are extracted */
+ *new_rl_cnt = dst_cnt - (punch_cnt - (int)begin_split - (int)end_split) -
+ merge_cnt;
+
+ ntfs_free(dst_rl);
+ return new_rl;
+}
--
2.25.1
next prev parent reply other threads:[~2025-11-27 5:01 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-27 4:59 [PATCH v2 00/11] ntfsplus: ntfs filesystem remake Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 01/11] ntfsplus: in-memory, on-disk structures and headers Namjae Jeon
2025-12-01 7:14 ` Christoph Hellwig
2025-12-01 8:19 ` Pali Rohár
2025-12-01 10:14 ` Namjae Jeon
2025-12-01 8:47 ` Matthew Wilcox
2025-12-01 10:13 ` Namjae Jeon
2025-12-01 11:22 ` Christoph Hellwig
2025-12-01 11:46 ` Matthew Wilcox
2025-12-01 21:54 ` Namjae Jeon
2025-12-02 5:41 ` Christoph Hellwig
2025-12-01 10:36 ` Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 02/11] ntfsplus: add super block operations Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 03/11] ntfsplus: add inode operations Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 04/11] ntfsplus: add directory operations Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 05/11] ntfsplus: add file operations Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 06/11] ntfsplus: add iomap and address space operations Namjae Jeon
2025-12-01 7:35 ` Christoph Hellwig
2025-12-02 0:47 ` Namjae Jeon
2025-12-02 5:45 ` Christoph Hellwig
2025-12-02 7:52 ` Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 07/11] ntfsplus: add attrib operatrions Namjae Jeon
2025-12-01 7:36 ` Christoph Hellwig
2025-12-01 11:38 ` Namjae Jeon
2025-11-27 4:59 ` Namjae Jeon [this message]
2025-11-27 4:59 ` [PATCH v2 09/11] ntfsplus: add reparse and ea operations Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 10/11] ntfsplus: add misc operations Namjae Jeon
2025-11-27 4:59 ` [PATCH v2 11/11] ntfsplus: add Kconfig and Makefile Namjae Jeon
2025-11-27 9:30 ` Amir Goldstein
2025-11-27 12:18 ` Namjae Jeon
2025-11-27 11:21 ` Amir Goldstein
2025-11-27 12:40 ` Namjae Jeon
2025-11-27 13:11 ` Amir Goldstein
2025-11-28 3:02 ` Namjae Jeon
2025-11-28 10:18 ` Amir Goldstein
2025-11-28 12:28 ` Namjae Jeon
2025-11-27 11:10 ` [PATCH v2 00/11] ntfsplus: ntfs filesystem remake Amir Goldstein
2025-11-27 12:17 ` Namjae Jeon
2025-11-27 13:16 ` Amir Goldstein
2025-11-27 23:14 ` Namjae Jeon
2025-11-27 14:58 ` Matthew Wilcox
2025-11-27 23:19 ` Namjae Jeon
2025-11-28 1:46 ` Winston Wen
2025-11-28 4:26 ` Gao Xiang
2025-11-28 7:02 ` Namjae Jeon
2025-12-03 0:49 ` Winston Wen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251127045944.26009-9-linkinjeon@kernel.org \
--to=linkinjeon@kernel.org \
--cc=amir73il@gmail.com \
--cc=brauner@kernel.org \
--cc=cheol.lee@lge.com \
--cc=djwong@kernel.org \
--cc=dsterba@suse.com \
--cc=ebiggers@kernel.org \
--cc=gunho.lee@lge.com \
--cc=hch@infradead.org \
--cc=hch@lst.de \
--cc=hyc.lee@gmail.com \
--cc=iamjoonsoo.kim@lge.com \
--cc=jack@suse.cz \
--cc=jay.sim@lge.com \
--cc=josef@toxicpanda.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=neil@brown.name \
--cc=pali@kernel.org \
--cc=rgoldwyn@suse.com \
--cc=sandeen@sandeen.net \
--cc=tytso@mit.edu \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
--cc=xiang@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.