Linux Documentation

Linux Documentation
 help / color / mirror / Atom feed

* Re: [PATCH v7 4/6] iio: adc: ad4691: add SPI offload support
From: Jonathan Cameron @ 2026-04-12 17:56 UTC (permalink / raw)
  To: Radu Sabau via B4 Relay
  Cc: radu.sabau, Lars-Peter Clausen, Michael Hennerich, David Lechner,
	Nuno Sá, Andy Shevchenko, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Uwe Kleine-König, Liam Girdwood, Mark Brown,
	Linus Walleij, Bartosz Golaszewski, Philipp Zabel,
	Jonathan Corbet, Shuah Khan, linux-iio, devicetree, linux-kernel,
	linux-pwm, linux-gpio, linux-doc
In-Reply-To: <20260409-ad4692-multichannel-sar-adc-driver-v7-4-be375d4df2c5@analog.com>

On Thu, 09 Apr 2026 18:28:25 +0300
Radu Sabau via B4 Relay <devnull+radu.sabau.analog.com@kernel.org> wrote:

> From: Radu Sabau <radu.sabau@analog.com>
> 
> Add SPI offload support to enable DMA-based, CPU-independent data
> acquisition using the SPI Engine offload framework.
> 
> When an SPI offload is available (devm_spi_offload_get() succeeds),
> the driver registers a DMA engine IIO buffer and uses dedicated buffer
> setup operations. If no offload is available the existing software
> triggered buffer path is used unchanged.
> 
> Both CNV Burst Mode and Manual Mode support offload, but use different
> trigger mechanisms:
> 
> CNV Burst Mode: the SPI Engine is triggered by the ADC's DATA_READY
> signal on the GP pin specified by the trigger-source consumer reference
> in the device tree (one cell = GP pin number 0-3). For this mode the
> driver acts as both an SPI offload consumer (DMA RX stream, message
> optimization) and a trigger source provider: it registers the
> GP/DATA_READY output via devm_spi_offload_trigger_register() so the
> offload framework can match the '#trigger-source-cells' phandle and
> automatically fire the SPI Engine DMA transfer at end-of-conversion.
> 
> Manual Mode: the SPI Engine is triggered by a periodic trigger at
> the configured sampling frequency. The pre-built SPI message uses
> the pipelined CNV-on-CS protocol: N+1 16-bit transfers are issued
> for N active channels (the first result is discarded as garbage from
> the pipeline flush) and the remaining N results are captured by DMA.
> 
> All offload transfers use 16-bit frames (bits_per_word=16, len=2).
> The channel scan_type (storagebits=16, shift=0, IIO_BE) is shared
> between the software triggered-buffer and offload paths; no separate
> scan_type or channel array is needed for the offload case. The
> ad4691_manual_channels[] array introduced in the triggered-buffer
> commit is reused here: it hides the IIO_CHAN_INFO_OVERSAMPLING_RATIO
> attribute, which is not applicable in Manual Mode.
> 
> Kconfig gains a dependency on IIO_BUFFER_DMAENGINE.
> 
> Signed-off-by: Radu Sabau <radu.sabau@analog.com>

A few comments inline.

> diff --git a/drivers/iio/adc/ad4691.c b/drivers/iio/adc/ad4691.c
> index 3e5caa0972eb..839ea7f44c78 100644
> --- a/drivers/iio/adc/ad4691.c
> +++ b/drivers/iio/adc/ad4691.c


> +
> +static int ad4691_cnv_burst_offload_buffer_postenable(struct iio_dev *indio_dev)
> +{
> +	struct ad4691_state *st = iio_priv(indio_dev);
> +	struct ad4691_offload_state *offload = st->offload;
> +	struct device *dev = regmap_get_device(st->regmap);
> +	struct spi_device *spi = to_spi_device(dev);
> +	struct spi_offload_trigger_config config = {
> +		.type = SPI_OFFLOAD_TRIGGER_DATA_READY,
> +	};
> +	unsigned int n_active;
> +	unsigned int bit, k;
> +	int ret;
> +
> +	n_active = bitmap_weight(indio_dev->active_scan_mask, iio_get_masklength(indio_dev));
> +
> +	ret = regmap_write(st->regmap, AD4691_STD_SEQ_CONFIG,
> +			   bitmap_read(indio_dev->active_scan_mask, 0,
> +				       iio_get_masklength(indio_dev)));
> +	if (ret)
> +		return ret;
> +
> +	ret = regmap_write(st->regmap, AD4691_ACC_MASK_REG,
> +			   ~bitmap_read(indio_dev->active_scan_mask, 0,
> +				iio_get_masklength(indio_dev)) & GENMASK(15, 0));
This indent is hard to read. I would either use a local variable, or do it as

	ret = regmap_write(st->regmap, AD4691_ACC_MASK_REG,
			   ~bitmap_read(indio_dev->active_scan_mask, 0,
					iio_get_masklength(indio_dev)) &
			   GENMASK(15, 0));

> +	if (ret)
> +		return ret;
> +
> +	ret = ad4691_enter_conversion_mode(st);
> +	if (ret)
> +		return ret;
> +
> +	memset(st->scan_xfers, 0, sizeof(st->scan_xfers));
> +
> +	/*
> +	 * Each AVG_IN register read uses two 16-bit transfers:
> +	 *   TX: [reg_hi | 0x80, reg_lo]  (address, CS stays asserted)
> +	 *   RX: [data_hi, data_lo]       (data, storagebits=16, shift=0)
> +	 * The state reset is also split into two 16-bit transfers
> +	 * (address then value) to keep bits_per_word uniform throughout.
> +	 */
> +	k = 0;
> +	iio_for_each_active_channel(indio_dev, bit) {
> +		put_unaligned_be16(0x8000 | AD4691_AVG_IN(bit), offload->tx_cmd[k]);
> +
> +		/* TX: address phase, CS stays asserted into data phase */
> +		st->scan_xfers[2 * k].tx_buf = offload->tx_cmd[k];
> +		st->scan_xfers[2 * k].len = sizeof(offload->tx_cmd[k]);
> +		st->scan_xfers[2 * k].bits_per_word = AD4691_OFFLOAD_BITS_PER_WORD;
> +
> +		/* RX: data phase, CS toggles after to delimit the next register op */
> +		st->scan_xfers[2 * k + 1].len = sizeof(offload->tx_cmd[k]);
> +		st->scan_xfers[2 * k + 1].bits_per_word = AD4691_OFFLOAD_BITS_PER_WORD;
> +		st->scan_xfers[2 * k + 1].offload_flags = SPI_OFFLOAD_XFER_RX_STREAM;
> +		st->scan_xfers[2 * k + 1].cs_change = 1;
> +		k++;
> +	}
> +
> +	/* State reset to re-arm DATA_READY for the next scan. */
> +	put_unaligned_be16(AD4691_STATE_RESET_REG, offload->tx_reset);
> +	offload->tx_reset[2] = AD4691_STATE_RESET_ALL;
> +
> +	st->scan_xfers[2 * k].tx_buf = offload->tx_reset;
> +	st->scan_xfers[2 * k].len = sizeof(offload->tx_cmd[k]);
> +	st->scan_xfers[2 * k].bits_per_word = AD4691_OFFLOAD_BITS_PER_WORD;
> +
> +	st->scan_xfers[2 * k + 1].tx_buf = &offload->tx_reset[2];
> +	st->scan_xfers[2 * k + 1].len = sizeof(offload->tx_cmd[k]);
> +	st->scan_xfers[2 * k + 1].bits_per_word = AD4691_OFFLOAD_BITS_PER_WORD;
> +	st->scan_xfers[2 * k + 1].cs_change = 1;
> +
> +	spi_message_init_with_transfers(&st->scan_msg, st->scan_xfers, 2 * k + 2);
> +	st->scan_msg.offload = offload->spi;
> +
> +	ret = spi_optimize_message(spi, &st->scan_msg);
> +	if (ret)
> +		goto err_exit_conversion;
> +
> +	ret = ad4691_sampling_enable(st, true);
> +	if (ret)
> +		goto err_unoptimize;
> +
> +	ret = spi_offload_trigger_enable(offload->spi, offload->trigger, &config);
> +	if (ret)
> +		goto err_sampling_disable;
> +
> +	return 0;
> +
> +err_sampling_disable:
> +	ad4691_sampling_enable(st, false);
> +err_unoptimize:
> +	spi_unoptimize_message(&st->scan_msg);
> +err_exit_conversion:
> +	ad4691_exit_conversion_mode(st);
> +	return ret;
> +}

>  
>  static ssize_t sampling_frequency_store(struct device *dev,
> @@ -833,6 +1123,23 @@ static ssize_t sampling_frequency_store(struct device *dev,
>  	if (ret)
>  		return ret;
>  
> +	if (st->manual_mode && st->offload) {
> +		struct spi_offload_trigger_config config = {
> +			.type = SPI_OFFLOAD_TRIGGER_PERIODIC,
> +			.periodic = { .frequency_hz = freq },
> +		};
> +
> +		ret = spi_offload_trigger_validate(st->offload->trigger, &config);
> +		if (ret) {
> +			iio_device_release_direct(indio_dev);
> +			return ret;
> +		}
> +
> +		st->offload->trigger_hz = config.periodic.frequency_hz;
> +		iio_device_release_direct(indio_dev);
This release in a different scope is a bit ugly. 

Look at whether the auto cleanup approach works well here.

https://elixir.bootlin.com/linux/v7.0-rc7/source/include/linux/iio/iio.h#L767


> +		return len;
> +	}
> +

^ permalink raw reply

* Re: [PATCH v7 5/6] iio: adc: ad4691: add oversampling support
From: Jonathan Cameron @ 2026-04-12 17:58 UTC (permalink / raw)
  To: David Lechner
  Cc: radu.sabau, Lars-Peter Clausen, Michael Hennerich, Nuno Sá,
	Andy Shevchenko, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Uwe Kleine-König, Liam Girdwood, Mark Brown, Linus Walleij,
	Bartosz Golaszewski, Philipp Zabel, Jonathan Corbet, Shuah Khan,
	linux-iio, devicetree, linux-kernel, linux-pwm, linux-gpio,
	linux-doc
In-Reply-To: <742b1821-9103-414e-a860-c2e8d5406e35@baylibre.com>

On Fri, 10 Apr 2026 16:15:20 -0500
David Lechner <dlechner@baylibre.com> wrote:

> On 4/9/26 10:28 AM, Radu Sabau via B4 Relay wrote:
> > From: Radu Sabau <radu.sabau@analog.com>
> > 
> > Add per-channel oversampling ratio (OSR) support for CNV burst mode.
> > The accumulator depth register (ACC_DEPTH_IN) is programmed with the
> > selected OSR at buffer enable time and before each single-shot read.
> > 
> > Supported OSR values: 1, 2, 4, 8, 16, 32.
> > 
> > Introduce AD4691_MANUAL_CHANNEL() for manual mode channels, which do
> > not expose the oversampling ratio attribute since OSR is not applicable
> > in that mode. A separate manual_channels array is added to
> > struct ad4691_channel_info and selected at probe time; offload paths
> > reuse the same arrays with num_channels capping access before the soft
> > timestamp entry.
> > 
> > The reported sampling frequency accounts for the active OSR:
> > effective_freq = oscillator_freq / osr  
> 
> Technically, the way this is implemented is fine according to IIO ABI
> rules. Writing any attribute can cause others to change. It does
> introduce a potential pitfall though. Currently, changing the OSR will
> change the sampling frequency, so you have to always write oversampling_ratio
> first, then write sampling_frequency to get what you asked for. If you want
> to change the OSR and keep the same sample rate, you still have to write both
> attributes again.
> 
> In other drivers, I've implemented it so that the requested sampling frequency
> is stored any you always get the closest sampling frequency available based on
> the oversampling ratio. This way, it doesn't matter which order you write
> the attributes. In that case, the actual periodic trigger source isn't set up
> until we actually start sampling.
> 
Agreed. This is more intuitive. Now generally the userspace should
be sanity checking the value anyway as limitations may mean the new
sampling frequency is not particularly close to the original one but
at least it increases the chances of getting the expected value somewhat!

So to me this is a nice useability improvement given the code to implement
it tends not to be too complex.

Thanks,

J


^ permalink raw reply

* Re: (sashiko status) [RFC PATCH v5.2 00/11] mm/damon: introduce DAMOS failed region quota charge ratio
From: SeongJae Park @ 2026-04-12 18:14 UTC (permalink / raw)
  To: SeongJae Park
  Cc: damon, kunit-dev, linux-doc, linux-kernel, linux-kselftest,
	linux-mm
In-Reply-To: <20260412161957.82835-1-sj@kernel.org>

TL; DR: Seems Sashiko is finally convinced.  I will drop RFC tag from the next
spin.

Forwarding sashiko.dev review status for this thread, with my short comments
for issues-may-found reviews.

# review url: https://sashiko.dev/#/patchset/20260412161957.82835-1-sj@kernel.org

- [RFC PATCH v5.2 01/11] mm/damon/core: handle <min_region_sz remaining quota as empty
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 02/11] mm/damon/core: merge regions after applying DAMOS schemes
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 03/11] mm/damon/core: introduce failed region quota charge ratio
  - status: Reviewed
  - review: ISSUES MAY FOUND

Sashiko is asking a same question that I already decided to ignore.

- [RFC PATCH v5.2 04/11] mm/damon/sysfs-schemes: implement fail_charge_{num,denom} files
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 05/11] Docs/mm/damon/design: document fail_charge_{num,denom}
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 06/11] Docs/admin-guide/mm/damon/usage: document fail_charge_{num,denom} files
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 07/11] Docs/ABI/damon: document fail_charge_{num,denom}
  - status: Reviewed
  - review: ISSUES MAY FOUND

For this review, Sashiko seems just hallucinated.

- [RFC PATCH v5.2 08/11] mm/damon/tests/core-kunit: test fail_charge_{num,denom} committing
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 09/11] selftests/damon/_damon_sysfs: support failed region quota charge ratio
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 10/11] selftests/damon/drgn_dump_damon_status: support failed region quota charge ratio
  - status: Reviewed
  - review: No issues found.
- [RFC PATCH v5.2 11/11] selftests/damon/sysfs.py: test failed region quota charge ratio
  - status: Reviewed
  - review: No issues found.


Thanks,
SJ

# hkml [1] generated a draft of this mail.  It can be regenerated
# using below command:
#
#     hkml patch sashiko_dev --thread_status --for_forwarding \
#             20260412161957.82835-1-sj@kernel.org
#
# [1] https://github.com/sjp38/hackermail

^ permalink raw reply

* Re: [PATCH RFC v2 8/9] Documentation: ABI: testing: add docs for ad9910 sysfs entries
From: David Lechner @ 2026-04-12 18:45 UTC (permalink / raw)
  To: Jonathan Cameron, Rodrigo Alencar
  Cc: Rodrigo Alencar via B4 Relay, rodrigo.alencar, linux-iio,
	devicetree, linux-kernel, linux-doc, Lars-Peter Clausen,
	Michael Hennerich, Andy Shevchenko, Rob Herring,
	Krzysztof Kozlowski, Conor Dooley, Philipp Zabel, Jonathan Corbet,
	Shuah Khan
In-Reply-To: <20260412155115.2f7a83bf@jic23-huawei>

On 4/12/26 9:51 AM, Jonathan Cameron wrote:
> On Mon, 23 Mar 2026 11:36:08 +0000
> Rodrigo Alencar <455.rodrigo.alencar@gmail.com> wrote:
> 
>> On 26/03/22 05:22PM, Jonathan Cameron wrote:
>>> On Wed, 18 Mar 2026 17:56:08 +0000
>>> Rodrigo Alencar via B4 Relay <devnull+rodrigo.alencar.analog.com@kernel.org> wrote:
>>>   
>>>> From: Rodrigo Alencar <rodrigo.alencar@analog.com>
>>>>
>>>> Add ABI documentation file for the DDS AD9910 with sysfs entries to
>>>> control Parallel Port, Digital Ramp Generator, RAM and OSK parameters.
>>>>
>>>> Signed-off-by: Rodrigo Alencar <rodrigo.alencar@analog.com>
>>>> ---  
>>
...

>>>   
>>>> +		  - "ramp_down": No-dwell low; the ramp resets to upper
>>>> +		    limit upon reaching the lower limit.
>>>> +		  - "ramp_up": No-dwell high; the ramp resets to lower
>>>> +		    limit upon reaching the upper limit.
>>>> +		  - "bidirectional_continuous": Both no-dwell high and low;
>>>> +		    the ramp continuously sweeps without dwelling.  
>>>
>>> Triangle wave?  bidirectional continuous is a rather confusing term so maybe
>>> we should rethink this one.  
>>
>> Mostly yes, but not only that. Sawtooth can be achieved as well by changing
>> the step sizes, also other weird patterns can be achieved by toggling DRCTL pin.
> 
> Sawtooth is kind of a special triangle wave with one very steep side.
> Wikipedia even has: "It can also be considered the extreme case of an asymmetric triangle wave"
> https://en.wikipedia.org/wiki/Sawtooth_wave
> 
>> This mode is the most useful when one does not have an FPGA and want to save
>> resources on controlling the DRCTL pin. That mode name comes from the datasheet,
>> so I suppose it was fine.
> 
> Let us see if we can get more opinions on this.  Whilst I can see the logic of
> the datasheet naming, it's a bit obscure.
> 
It is the same as ramp_up and ramp_down other than what happens when it hits
the limit? If so, I would call it ramp_up_down.


^ permalink raw reply

* [PATCH RFC 00/13] mm/rmap: support arbitrary folio mappings
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)

This series is related to my LSF/MM/BPF topic:

	[LSF/MM/BPF TOPIC] Towards removing CONFIG_PAGE_MAPCOUNT [1]

And does the following things:

(a) Gets rid of CONFIG_PAGE_MAPCOUNT, stopping rmap-related code to no
    longer use page->_mapcount.

(b) Converts the entire mapcount to a "total mapped pages" counter, that
    can trivially be used to calculate the per-page average mapcount in
    a folio.

(c) Cleans up the code heavily,

(d) Teaches RMAP code to support arbitrary folio mappings: For example,
    supporting PMD-mapping of folios that span multiple PMDs.

Initially, I wanted to use a PMD + PUD mapcount, but once I realized that
we can do the same thing much easier with a "total mapped pages" counters,
I tried that. And was surprised how clean it looks.

More details in the last patch.

Functional Changes
------------------

The kernel now always behaves like CONFIG_PAGE_NO_MAPCOUNT currently
does, in particular:

(1) System/node/memcg stats account large folios as fully mapped as soon
    as a single page is mapped, instead of the precise number of pages
    a partially-mapped folio has mapped. For example, this affects
    "AnonPages:", "Mapped:" and "Shmem" in /proc/meminfo.

(2) "mapmax" part of /proc/$PID/numa_maps uses the average page mapcount
    in a folio instead of the effective page mapcount.

(3) Determining the PM_MMAP_EXCLUSIVE flag for /proc/$PID/pagemap is based on
    folio_maybe_mapped_shared() instead of the effective page mapcount.

(4) /proc/kpagecount exposes the average page mapcount in a folio
    instead of the effective page mapcount.

(5) Calculating the Pss for /proc/$PID/smaps and /proc/$PID/smaps_rollup
    uses the average page mapcount in a folio instead of the effective
    page mapcount.

(6) Calculating the Uss for /proc/$PID/smaps and /proc/$PID/smaps_rollup
    uses folio_maybe_mapped_shared() instead of the effective page
    mapcount.

(7) Detecting partially-mapped anonymous folios uses the average
    page-page mapcount. This implies that we cannot detect partial
    mappings of shared anonymous folios in all cases.

TODOs
-----

Partially-mapped folios:

If deemed relevant, we could detect more partially-mapped shared
anonymous folios on the memory reclaim path (e.g., during access-bit
harvesting) and flag them accordingly, so they can get deferred-split.
We might also just let the deferred splitting logic perform more such
scanning of possible candidates.

Mapcount overflows:

It may already be possible to overflow a large folio's mapcount
(+refcount). With this series, it may be possible to overflow
"total mapped pages" on 32bit; and I'd like to avoid making it an
unsigned long long on 32bit.

In a distant future, we may want a 64bit mapcountv value, but for
the time being (no relevant use cases), we should likely reject new
folio mappings if there is the possibility for mapcount +
"total mapped pages" overflows early. I assume doing some basic checks
during fork() + file folio mapping should be good enough (e.g., stop
once it would turn negative).

This series saw only very basic testing on 64bit and no performance
fine-tuning yet.

[1] https://lore.kernel.org/all/fe6afcc3-7539-4650-863b-04d971e89cfb@kernel.org/

---
David Hildenbrand (Arm) (13):
      mm/rmap: remove folio->_nr_pages_mapped
      fs/proc/task_mmu: remove CONFIG_PAGE_MAPCOUNT handling for "mapmax"
      fs/proc/page: remove CONFIG_PAGE_MAPCOUNT handling for kpagecount
      fs/proc/task_mmu: remove CONFIG_PAGE_MAPCOUNT handling for PM_MMAP_EXCLUSIVE
      fs/proc/task_mmu: remove mapcount comment in smaps_account()
      fs/proc/task_mmu: remove CONFIG_PAGE_MAPCOUNT handling in smaps_account()
      mm/rmap: remove CONFIG_PAGE_MAPCOUNT
      mm: re-consolidate folio->_entire_mapcount
      mm: move _large_mapcount to _mapcount in page[1] of a large folio
      mm: re-consolidate folio->_pincount
      mm/rmap: stop using the entire mapcount for hugetlb folios
      mm/rmap: large mapcount interface cleanups
      mm/rmap: support arbitrary folio mappings

 Documentation/admin-guide/cgroup-v1/memory.rst |   6 +-
 Documentation/admin-guide/cgroup-v2.rst        |  13 +-
 Documentation/admin-guide/mm/pagemap.rst       |  30 ++-
 Documentation/filesystems/proc.rst             |  41 ++--
 Documentation/mm/transhuge.rst                 |  29 +--
 fs/proc/internal.h                             |  58 +----
 fs/proc/page.c                                 |  10 +-
 fs/proc/task_mmu.c                             |  69 ++----
 include/linux/mm.h                             |  37 +--
 include/linux/mm_types.h                       |  22 +-
 include/linux/pgtable.h                        |  22 ++
 include/linux/rmap.h                           | 221 ++++++++----------
 mm/Kconfig                                     |  17 --
 mm/debug.c                                     |  10 +-
 mm/internal.h                                  |  30 +--
 mm/memory.c                                    |   3 +-
 mm/page_alloc.c                                |  31 +--
 mm/rmap.c                                      | 302 ++++++++-----------------
 18 files changed, 325 insertions(+), 626 deletions(-)
---
base-commit: 196ab4af58d724f24335fed3da62920c3cea945f
change-id: 20260330-mapcount-32066c687010

Best regards,
-- 
David Hildenbrand (Arm) <david@kernel.org>


^ permalink raw reply

* [PATCH RFC 01/13] mm/rmap: remove folio->_nr_pages_mapped
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

In preparation for removing CONFIG_PAGE_MAPCOUNT, let's stop updating
a folio's _nr_pages_mapped and remove it.

This will make CONFIG_PAGE_MAPCOUNT behave just like
CONFIG_NO_PAGE_MAPCOUNT, in particular:

(1) We account folios as fully mapped as soon as a single page is
    mapped. That is visible through:

    (1) Memcg stats (e.g., "anon" and "file_mapped" in cgroup v2)

    (2) System stats (e.g., "AnonPages:", "Mapped:" and "Shmem"
        in /proc/meminfo)

    (3) Per-node stats (e.g., "AnonPages:", "Mapped:" and "Shmem")
        in /sys/devices/system/node/nodeX/meminfo

Especially for anonymous memory, that memory consumption is now visible
for partially-mapped folios until actually split and the unmapped pages
are reclaimed.

(2) We do not detect partially-mapped anonymous folios in all cases

We now detect partial mappings based on the average per-page mapcount in a
folio: if it is < 1, at least one page is not mapped.

In the most common case (exclusive anonymous folios), we always detect
partial mappings this way reliably.

Example scenarios where we will not detect partial mappings:

(A) Allocate a THP and fork a child process. Then, unmap up to half of the
    THP in the parent *and* the child. Once the child quits, we detect
    the partial mapping.

    The folio mapcount will be >= 512 -> Average >= 1.

(B) Allocate a THP and fork 511 child processes. Then, unmap all but one
    page in *all* processes.

    The folio mapcount will be 512 -> Average == 1.

There are two main ideas on how to detect these cases as well, if we
ever get a real indication that this is problematic:

* Let memory reclaim scan candidates (shared anonymous folios) to detect
  partial mappings.

* Add candidate folios to the deferred split queue and let the deferred
  shrinker detect partial mappings.

More code cleanups are possible, but we'll defer that and focus on the
core change here.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 Documentation/admin-guide/cgroup-v1/memory.rst |   6 +-
 Documentation/admin-guide/cgroup-v2.rst        |  13 +-
 Documentation/mm/transhuge.rst                 |  23 ++--
 include/linux/mm_types.h                       |   4 +-
 include/linux/rmap.h                           |   4 +-
 mm/debug.c                                     |   3 +-
 mm/internal.h                                  |  24 ----
 mm/page_alloc.c                                |   5 -
 mm/rmap.c                                      | 159 ++++++++-----------------
 9 files changed, 69 insertions(+), 172 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index 7db63c002922..ddb5ff5cee15 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -609,9 +609,9 @@ memory.stat file includes following statistics:
 
 	'rss + mapped_file" will give you resident set size of cgroup.
 
-	Note that some kernel configurations might account complete larger
-	allocations (e.g., THP) towards 'rss' and 'mapped_file', even if
-	only some, but not all that memory is mapped.
+	Note that the kernel accounts entire larger allocations (e.g., THP)
+	towards 'rss' and 'mapped_file' if any part of such an allocation
+	is mapped.
 
 	(Note: file and shmem may be shared among other cgroups. In that case,
 	mapped_file is accounted only when the memory cgroup is owner of page
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 8ad0b2781317..aa703ec89e29 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1538,10 +1538,9 @@ The following nested keys are defined.
 
 	  anon
 		Amount of memory used in anonymous mappings such as
-		brk(), sbrk(), and mmap(MAP_ANONYMOUS). Note that
-		some kernel configurations might account complete larger
-		allocations (e.g., THP) if only some, but not all the
-		memory of such an allocation is mapped anymore.
+		brk(), sbrk(), and mmap(MAP_ANONYMOUS). Note that the
+		kernel accounts entire larger allocations (e.g., THP) towards
+		"anon" if any part of such an allocation is mapped.
 
 	  file
 		Amount of memory used to cache filesystem data,
@@ -1585,9 +1584,9 @@ The following nested keys are defined.
 
 	  file_mapped
 		Amount of cached filesystem data mapped with mmap(). Note
-		that some kernel configurations might account complete
-		larger allocations (e.g., THP) if only some, but not
-		not all the memory of such an allocation is mapped.
+		that the kernel accounts entire larger allocations
+		(e.g., THP) towards "file_mapped" if any part of such an
+		allocation is mapped.
 
 	  file_dirty
 		Amount of cached filesystem data that was modified but
diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index 0e7f8e4cd2e3..f200c1ac19cb 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -122,10 +122,6 @@ pages:
     corresponding mapcount), and the current status ("maybe mapped shared" vs.
     "mapped exclusively").
 
-    With CONFIG_PAGE_MAPCOUNT, we also increment/decrement
-    folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount goes
-    from -1 to 0 or 0 to -1.
-
   - map/unmap of individual pages with PTE entry increment/decrement
     folio->_large_mapcount.
 
@@ -134,9 +130,7 @@ pages:
     "mapped exclusively").
 
     With CONFIG_PAGE_MAPCOUNT, we also increment/decrement
-    page->_mapcount and increment/decrement folio->_nr_pages_mapped when
-    page->_mapcount goes from -1 to 0 or 0 to -1 as this counts the number
-    of pages mapped by PTE.
+    page->_mapcount.
 
 split_huge_page internally has to distribute the refcounts in the head
 page to the tail pages before clearing all PG_head/tail bits from the page
@@ -181,12 +175,9 @@ The function deferred_split_folio() is used to queue a folio for splitting.
 The splitting itself will happen when we get memory pressure via shrinker
 interface.
 
-With CONFIG_PAGE_MAPCOUNT, we reliably detect partial mappings based on
-folio->_nr_pages_mapped.
-
-With CONFIG_NO_PAGE_MAPCOUNT, we detect partial mappings based on the
-average per-page mapcount in a THP: if the average is < 1, an anon THP is
-certainly partially mapped. As long as only a single process maps a THP,
-this detection is reliable. With long-running child processes, there can
-be scenarios where partial mappings can currently not be detected, and
-might need asynchronous detection during memory reclaim in the future.
+We detect partial mappings based on the average per-page mapcount in a THP: if
+the average is < 1, an anon THP is certainly partially mapped. As long as
+only a single process maps a THP, this detection is reliable. With
+long-running child processes, there can be scenarios where partial mappings
+can currently not be detected, and might need asynchronous detection during
+memory reclaim in the future.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a308e2c23b82..47b2c3d05f41 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -377,7 +377,7 @@ typedef unsigned short mm_id_t;
  * @_last_cpupid: IDs of last CPU and last process that accessed the folio.
  * @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
  * @_large_mapcount: Do not use directly, call folio_mapcount().
- * @_nr_pages_mapped: Do not use outside of rmap and debug code.
+ * @_unused_1: Temporary placeholder.
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_nr_pages: Do not use directly, call folio_nr_pages().
  * @_mm_id: Do not use outside of rmap code.
@@ -452,7 +452,7 @@ struct folio {
 				struct {
 	/* public: */
 					atomic_t _large_mapcount;
-					atomic_t _nr_pages_mapped;
+					unsigned int _unused_1;
 #ifdef CONFIG_64BIT
 					atomic_t _entire_mapcount;
 					atomic_t _pincount;
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 8dc0871e5f00..e5569f5fdaec 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -291,7 +291,7 @@ static inline void folio_add_large_mapcount(struct folio *folio,
 static inline int folio_add_return_large_mapcount(struct folio *folio,
 		int diff, struct vm_area_struct *vma)
 {
-	BUILD_BUG();
+	return atomic_add_return(diff, &folio->_large_mapcount) + 1;
 }
 
 static inline void folio_sub_large_mapcount(struct folio *folio,
@@ -303,7 +303,7 @@ static inline void folio_sub_large_mapcount(struct folio *folio,
 static inline int folio_sub_return_large_mapcount(struct folio *folio,
 		int diff, struct vm_area_struct *vma)
 {
-	BUILD_BUG();
+	return atomic_sub_return(diff, &folio->_large_mapcount) + 1;
 }
 #endif /* CONFIG_MM_ID */
 
diff --git a/mm/debug.c b/mm/debug.c
index 77fa8fe1d641..bfb41ef17a5e 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -86,11 +86,10 @@ static void __dump_folio(const struct folio *folio, const struct page *page,
 		if (folio_has_pincount(folio))
 			pincount = atomic_read(&folio->_pincount);
 
-		pr_warn("head: order:%u mapcount:%d entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
+		pr_warn("head: order:%u mapcount:%d entire_mapcount:%d pincount:%d\n",
 				folio_order(folio),
 				folio_mapcount(folio),
 				folio_entire_mapcount(folio),
-				folio_nr_pages_mapped(folio),
 				pincount);
 	}
 
diff --git a/mm/internal.h b/mm/internal.h
index c693646e5b3f..30e48f39d2de 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -103,34 +103,12 @@ struct pagetable_move_control {
 
 void page_writeback_init(void);
 
-/*
- * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages,
- * its nr_pages_mapped would be 0x400000: choose the ENTIRELY_MAPPED bit
- * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE).  Hugetlb currently
- * leaves nr_pages_mapped at 0, but avoid surprise if it participates later.
- */
-#define ENTIRELY_MAPPED		0x800000
-#define FOLIO_PAGES_MAPPED	(ENTIRELY_MAPPED - 1)
-
 /*
  * Flags passed to __show_mem() and show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES		(0x0001u)	/* disallowed nodes */
 
-/*
- * How many individual pages have an elevated _mapcount.  Excludes
- * the folio's entire_mapcount.
- *
- * Don't use this function outside of debugging code.
- */
-static inline int folio_nr_pages_mapped(const struct folio *folio)
-{
-	if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT))
-		return -1;
-	return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;
-}
-
 /*
  * Retrieve the first entry of a folio based on a provided entry within the
  * folio. We cannot rely on folio->swap as there is no guarantee that it has
@@ -885,8 +863,6 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
 
 	folio_set_order(folio, order);
 	atomic_set(&folio->_large_mapcount, -1);
-	if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-		atomic_set(&folio->_nr_pages_mapped, 0);
 	if (IS_ENABLED(CONFIG_MM_ID)) {
 		folio->_mm_ids = 0;
 		folio->_mm_id_mapcount[0] = -1;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b1c5430cad4e..8888f31aca49 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1111,11 +1111,6 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
 			bad_page(page, "nonzero large_mapcount");
 			goto out;
 		}
-		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT) &&
-		    unlikely(atomic_read(&folio->_nr_pages_mapped))) {
-			bad_page(page, "nonzero nr_pages_mapped");
-			goto out;
-		}
 		if (IS_ENABLED(CONFIG_MM_ID)) {
 			if (unlikely(folio->_mm_id_mapcount[0] != -1)) {
 				bad_page(page, "nonzero mm mapcount 0");
diff --git a/mm/rmap.c b/mm/rmap.c
index 78b7fb5f367c..df42c38fe387 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1353,9 +1353,8 @@ static __always_inline void __folio_add_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *vma,
 		enum pgtable_level level)
 {
-	atomic_t *mapped = &folio->_nr_pages_mapped;
+	int nr = 0, nr_pmdmapped = 0, mapcount;
 	const int orig_nr_pages = nr_pages;
-	int first = 0, nr = 0, nr_pmdmapped = 0;
 
 	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
 
@@ -1366,61 +1365,25 @@ static __always_inline void __folio_add_rmap(struct folio *folio,
 			break;
 		}
 
-		if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
-			nr = folio_add_return_large_mapcount(folio, orig_nr_pages, vma);
-			if (nr == orig_nr_pages)
-				/* Was completely unmapped. */
-				nr = folio_large_nr_pages(folio);
-			else
-				nr = 0;
-			break;
+		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
+			do {
+				atomic_inc(&page->_mapcount);
+			} while (page++, --nr_pages > 0);
 		}
 
-		do {
-			first += atomic_inc_and_test(&page->_mapcount);
-		} while (page++, --nr_pages > 0);
-
-		if (first &&
-		    atomic_add_return_relaxed(first, mapped) < ENTIRELY_MAPPED)
-			nr = first;
-
-		folio_add_large_mapcount(folio, orig_nr_pages, vma);
+		mapcount = folio_add_return_large_mapcount(folio, orig_nr_pages, vma);
+		if (mapcount == orig_nr_pages)
+			nr = folio_large_nr_pages(folio);
 		break;
 	case PGTABLE_LEVEL_PMD:
 	case PGTABLE_LEVEL_PUD:
-		first = atomic_inc_and_test(&folio->_entire_mapcount);
-		if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
-			if (level == PGTABLE_LEVEL_PMD && first)
-				nr_pmdmapped = folio_large_nr_pages(folio);
-			nr = folio_inc_return_large_mapcount(folio, vma);
-			if (nr == 1)
-				/* Was completely unmapped. */
-				nr = folio_large_nr_pages(folio);
-			else
-				nr = 0;
-			break;
-		}
+		if (atomic_inc_and_test(&folio->_entire_mapcount) &&
+		    level == PGTABLE_LEVEL_PMD)
+			nr_pmdmapped = HPAGE_PMD_NR;
 
-		if (first) {
-			nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped);
-			if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) {
-				nr_pages = folio_large_nr_pages(folio);
-				/*
-				 * We only track PMD mappings of PMD-sized
-				 * folios separately.
-				 */
-				if (level == PGTABLE_LEVEL_PMD)
-					nr_pmdmapped = nr_pages;
-				nr = nr_pages - (nr & FOLIO_PAGES_MAPPED);
-				/* Raced ahead of a remove and another add? */
-				if (unlikely(nr < 0))
-					nr = 0;
-			} else {
-				/* Raced ahead of a remove of ENTIRELY_MAPPED */
-				nr = 0;
-			}
-		}
-		folio_inc_large_mapcount(folio, vma);
+		mapcount = folio_inc_return_large_mapcount(folio, vma);
+		if (mapcount == 1)
+			nr = folio_large_nr_pages(folio);
 		break;
 	default:
 		BUILD_BUG();
@@ -1676,15 +1639,11 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
 		}
 
 		folio_set_large_mapcount(folio, nr, vma);
-		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-			atomic_set(&folio->_nr_pages_mapped, nr);
 	} else {
 		nr = folio_large_nr_pages(folio);
 		/* increment count (starts at -1) */
 		atomic_set(&folio->_entire_mapcount, 0);
 		folio_set_large_mapcount(folio, 1, vma);
-		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-			atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
 		if (exclusive)
 			SetPageAnonExclusive(&folio->page);
 		nr_pmdmapped = nr;
@@ -1773,12 +1732,28 @@ void folio_add_file_rmap_pud(struct folio *folio, struct page *page,
 #endif
 }
 
+static bool __folio_certainly_partially_mapped(struct folio *folio, int mapcount)
+{
+	/*
+	 * This is a best-effort check only: if the average per-page
+	 * mapcount in the folio is smaller than 1, at least one page is not
+	 * mapped -> partially mapped. This is always reliable for exclusive
+	 * folios.
+	 *
+	 * We will not detect partial mappings in all scenarios:
+	 * when a folio becomes partially mapped while shared and the
+	 * average per-page mapcount is >= 1. However, we will detect the
+	 * partial mapping once it becomes exclusively mapped again.
+	 */
+	return mapcount && !folio_entire_mapcount(folio) &&
+	       mapcount < folio_large_nr_pages(folio);
+}
+
 static __always_inline void __folio_remove_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *vma,
 		enum pgtable_level level)
 {
-	atomic_t *mapped = &folio->_nr_pages_mapped;
-	int last = 0, nr = 0, nr_pmdmapped = 0;
+	int nr = 0, nr_pmdmapped = 0, mapcount;
 	bool partially_mapped = false;
 
 	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
@@ -1790,67 +1765,29 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
 			break;
 		}
 
-		if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
-			nr = folio_sub_return_large_mapcount(folio, nr_pages, vma);
-			if (!nr) {
-				/* Now completely unmapped. */
-				nr = folio_large_nr_pages(folio);
-			} else {
-				partially_mapped = nr < folio_large_nr_pages(folio) &&
-						   !folio_entire_mapcount(folio);
-				nr = 0;
-			}
-			break;
-		}
-
-		folio_sub_large_mapcount(folio, nr_pages, vma);
-		do {
-			last += atomic_add_negative(-1, &page->_mapcount);
-		} while (page++, --nr_pages > 0);
+		mapcount = folio_sub_return_large_mapcount(folio, nr_pages, vma);
+		if (!mapcount)
+			nr = folio_large_nr_pages(folio);
 
-		if (last &&
-		    atomic_sub_return_relaxed(last, mapped) < ENTIRELY_MAPPED)
-			nr = last;
+		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
+			do {
+				atomic_dec(&page->_mapcount);
+			} while (page++, --nr_pages > 0);
+		}
 
-		partially_mapped = nr && atomic_read(mapped);
+		partially_mapped = __folio_certainly_partially_mapped(folio, mapcount);
 		break;
 	case PGTABLE_LEVEL_PMD:
 	case PGTABLE_LEVEL_PUD:
-		if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
-			last = atomic_add_negative(-1, &folio->_entire_mapcount);
-			if (level == PGTABLE_LEVEL_PMD && last)
-				nr_pmdmapped = folio_large_nr_pages(folio);
-			nr = folio_dec_return_large_mapcount(folio, vma);
-			if (!nr) {
-				/* Now completely unmapped. */
-				nr = folio_large_nr_pages(folio);
-			} else {
-				partially_mapped = last &&
-						   nr < folio_large_nr_pages(folio);
-				nr = 0;
-			}
-			break;
-		}
+		mapcount = folio_dec_return_large_mapcount(folio, vma);
+		if (!mapcount)
+			nr = folio_large_nr_pages(folio);
 
-		folio_dec_large_mapcount(folio, vma);
-		last = atomic_add_negative(-1, &folio->_entire_mapcount);
-		if (last) {
-			nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
-			if (likely(nr < ENTIRELY_MAPPED)) {
-				nr_pages = folio_large_nr_pages(folio);
-				if (level == PGTABLE_LEVEL_PMD)
-					nr_pmdmapped = nr_pages;
-				nr = nr_pages - nr;
-				/* Raced ahead of another remove and an add? */
-				if (unlikely(nr < 0))
-					nr = 0;
-			} else {
-				/* An add of ENTIRELY_MAPPED raced ahead */
-				nr = 0;
-			}
-		}
+		if (atomic_add_negative(-1, &folio->_entire_mapcount) &&
+		    level == PGTABLE_LEVEL_PMD)
+			nr_pmdmapped = HPAGE_PMD_NR;
 
-		partially_mapped = nr && nr < nr_pmdmapped;
+		partially_mapped = __folio_certainly_partially_mapped(folio, mapcount);
 		break;
 	default:
 		BUILD_BUG();

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 02/13] fs/proc/task_mmu: remove CONFIG_PAGE_MAPCOUNT handling for "mapmax"
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

In preparation for removing CONFIG_PAGE_MAPCOUNT, let's always use a
folio's average page mapcount instead of the precise page mapcount when
calculating "mapmax".

Update the doc to state that this behavior no longer depends on the
kernel config. While at it, make it clearer what "mapmax" actually
expresses.

For small folios, or large folios that are mostly fully-mapped, there is
no change at all.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 Documentation/filesystems/proc.rst |  8 ++++----
 fs/proc/task_mmu.c                 | 11 +++--------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 628364b0f69f..1224dc73e089 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -699,10 +699,10 @@ Where:
 node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page
 size, in KB, that is backing the mapping up.
 
-Note that some kernel configurations do not track the precise number of times
-a page part of a larger allocation (e.g., THP) is mapped. In these
-configurations, "mapmax" might corresponds to the average number of mappings
-per page in such a larger allocation instead.
+"mapmax" is the maximum page mapcount of any page in the mapping, i.e.,
+the highest sharing level observed. For pages that are part of larger
+allocations (e.g., THP), it is derived from the average mapcount per page
+in the allocation, since precise per-page mapcounts are not available.
 
 1.2 Kernel data
 ---------------
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e091931d7ca1..ad0989d101ab 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -3137,12 +3137,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
 			unsigned long nr_pages)
 {
 	struct folio *folio = page_folio(page);
-	int count;
-
-	if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-		count = folio_precise_page_mapcount(folio, page);
-	else
-		count = folio_average_page_mapcount(folio);
+	const int mapcount = folio_average_page_mapcount(folio);
 
 	md->pages += nr_pages;
 	if (pte_dirty || folio_test_dirty(folio))
@@ -3160,8 +3155,8 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
 	if (folio_test_anon(folio))
 		md->anon += nr_pages;
 
-	if (count > md->mapcount_max)
-		md->mapcount_max = count;
+	if (mapcount > md->mapcount_max)
+		md->mapcount_max = mapcount;
 
 	md->node[folio_nid(folio)] += nr_pages;
 }

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 03/13] fs/proc/page: remove CONFIG_PAGE_MAPCOUNT handling for kpagecount
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

In preparation for removing CONFIG_PAGE_MAPCOUNT, let's always use a
folio's average page mapcount instead of the precise page mapcount when
calculating the kpagecount value, like we do with CONFIG_NO_PAGE_MAPCOUNT.

Update the doc to state that this behavior no longer depends on the
kernel config. While at it, improve the documentation a bit. "pagecount"
was really misnamed back in the days ...

Should we mention that the value is not actually really expressive in many
cases, such as for the shared zeropage or pages with a PFNMAP mapping?
Let's keep it simple, the hope is that this interface is not used at
all anymore, except for some weird debugging scenarios.

For small folios, or large folios that are fully-mapped everywhere, there
is no change at all.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 Documentation/admin-guide/mm/pagemap.rst | 13 ++++++-------
 fs/proc/page.c                           | 10 +---------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index c57e61b5d8aa..f9478bcbb6a9 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -53,13 +53,12 @@ There are four components to pagemap:
    determine which areas of memory are actually mapped and llseek to
    skip over unmapped regions.
 
- * ``/proc/kpagecount``.  This file contains a 64-bit count of the number of
-   times each page is mapped, indexed by PFN. Some kernel configurations do
-   not track the precise number of times a page part of a larger allocation
-   (e.g., THP) is mapped. In these configurations, the average number of
-   mappings per page in this larger allocation is returned instead. However,
-   if any page of the large allocation is mapped, the returned value will
-   be at least 1.
+ * ``/proc/kpagecount``.  This file contains a 64-bit value for each page,
+   indexed by PFN, representing its mapcount, i.e., the number of times it
+   is mapped into page tables.  For pages that are part of larger allocations
+   (e.g., THP), the average mapcount per page in the allocation is used, since
+   precise per-page mapcounts are not available.  If any page in such an
+   allocation is mapped, the returned value will be at least 1.
 
 The page-types tool in the tools/mm directory can be used to query the
 number of times a page is mapped.
diff --git a/fs/proc/page.c b/fs/proc/page.c
index f9b2c2c906cd..bc4d7c3751de 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -45,17 +45,9 @@ static inline unsigned long get_max_dump_pfn(void)
 static u64 get_kpage_count(const struct page *page)
 {
 	struct page_snapshot ps;
-	u64 ret;
 
 	snapshot_page(&ps, page);
-
-	if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-		ret = folio_precise_page_mapcount(&ps.folio_snapshot,
-						  &ps.page_snapshot);
-	else
-		ret = folio_average_page_mapcount(&ps.folio_snapshot);
-
-	return ret;
+	return folio_average_page_mapcount(&ps.folio_snapshot);
 }
 
 static ssize_t kpage_read(struct file *file, char __user *buf,

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 04/13] fs/proc/task_mmu: remove CONFIG_PAGE_MAPCOUNT handling for PM_MMAP_EXCLUSIVE
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

In preparation for removing CONFIG_PAGE_MAPCOUNT, let's always use a
folio_maybe_mapped_shared() to detect possible page sharing, like we do
with CONFIG_NO_PAGE_MAPCOUNT.

Update the doc to state that this behavior no longer depends on the
kernel config, and simplify the doc a bit to mention less details that
are hard to follow.

For small folios and for large folios that were never mapped in multiple
processes at the same time, there is no change at all. For large folios,
there might be a change if

(1) The folio was once mapped at the same time into more than two
    address space, and now is only mapped in a single address space. We
    might detect it as shared.
(2) A folio page is only mapped into a single address space, but folio
    pages mapped into other address spaces. We will detect it as
    shared.
(3) A folio page is mapped multiple times into the same address space. We
    will detect it as exclusive.

We can now remove __folio_page_mapped_exclusively().

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 Documentation/admin-guide/mm/pagemap.rst | 17 +++++++----------
 fs/proc/task_mmu.c                       | 12 ++----------
 2 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index f9478bcbb6a9..67eb04b1e246 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -38,16 +38,13 @@ There are four components to pagemap:
    precisely which pages are mapped (or in swap) and comparing mapped
    pages between processes.
 
-   Traditionally, bit 56 indicates that a page is mapped exactly once and bit
-   56 is clear when a page is mapped multiple times, even when mapped in the
-   same process multiple times. In some kernel configurations, the semantics
-   for pages part of a larger allocation (e.g., THP) can differ: bit 56 is set
-   if all pages part of the corresponding large allocation are *certainly*
-   mapped in the same process, even if the page is mapped multiple times in that
-   process. Bit 56 is clear when any page page of the larger allocation
-   is *maybe* mapped in a different process. In some cases, a large allocation
-   might be treated as "maybe mapped by multiple processes" even though this
-   is no longer the case.
+   Bit 56 set indicates that the page is currently *certainly* exclusively
+   mapped in this process, and bit 56 clear indicates that the page *might be*
+   mapped into multiple processes ("shared").  Note that in the past, the bit
+   precisely indicated that a page was mapped exactly once, and the bit was
+   clear also if mapped multiple times in the same process.  As this precise
+   information is not available for pages that are part of large allocations
+   (e.g., THP), the semantics have been slightly adjusted.
 
    Efficient users of this interface will use ``/proc/pid/maps`` to
    determine which areas of memory are actually mapped and llseek to
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ad0989d101ab..1e1572849fed 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1884,13 +1884,6 @@ static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm)
 	return 0;
 }
 
-static bool __folio_page_mapped_exclusively(struct folio *folio, struct page *page)
-{
-	if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-		return folio_precise_page_mapcount(folio, page) == 1;
-	return !folio_maybe_mapped_shared(folio);
-}
-
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
 			    __always_unused int depth, struct mm_walk *walk)
 {
@@ -1985,8 +1978,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 		folio = page_folio(page);
 		if (!folio_test_anon(folio))
 			flags |= PM_FILE;
-		if ((flags & PM_PRESENT) &&
-		    __folio_page_mapped_exclusively(folio, page))
+		if ((flags & PM_PRESENT) && !folio_maybe_mapped_shared(folio))
 			flags |= PM_MMAP_EXCLUSIVE;
 	}
 
@@ -2058,7 +2050,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr,
 		pagemap_entry_t pme;
 
 		if (folio && (flags & PM_PRESENT) &&
-		    __folio_page_mapped_exclusively(folio, page))
+		    !folio_maybe_mapped_shared(folio))
 			cur_flags |= PM_MMAP_EXCLUSIVE;
 
 		pme = make_pme(frame, cur_flags);

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 05/13] fs/proc/task_mmu: remove mapcount comment in smaps_account()
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

Reading the mapcount is a usually a snapshot that can change immediately
afterwards, except when the folio is locked and the folio is unmapped.

For example, nothing stops other folio/page mappings that are not protected
through the same PTL from going away; the folio lock cannot prevent that
situation.

Let's just drop the comment.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 fs/proc/task_mmu.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1e1572849fed..55b037768c60 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -968,11 +968,6 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 		exclusive = !folio_maybe_mapped_shared(folio);
 	}
 
-	/*
-	 * We obtain a snapshot of the mapcount. Without holding the folio lock
-	 * this snapshot can be slightly wrong as we cannot always read the
-	 * mapcount atomically.
-	 */
 	for (i = 0; i < nr; i++, page++) {
 		unsigned long pss = PAGE_SIZE << PSS_SHIFT;
 

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 06/13] fs/proc/task_mmu: remove CONFIG_PAGE_MAPCOUNT handling in smaps_account()
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

In preparation for removing CONFIG_PAGE_MAPCOUNT, let's always use
folio_maybe_mapped_shared() to detect possible page sharing for
calculating the USS, and use folio_average_page_mapcount() to calculate
the PSS, like we do with CONFIG_NO_PAGE_MAPCOUNT.

We can now stop looping over all pages. We could now also get rid
of the "folio_ref_count(folio) == 1" handling that tried to avoid the loop
in the past. But it still looks like a nice and simply
micro-optimization given that many (small) folios only have a single
mapping.

Rename "exclusive" to "private" such that it directly matches the
parameter name in smaps_page_accumulate(), and cleanup the code to
only have a single smaps_page_accumulate() call.

Update the doc to state that this behavior no longer depends on the
kernel config, and simplify the doc a bit to mention less details that
are hard to follow.

We can now remove folio_precise_page_mapcount().

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 Documentation/filesystems/proc.rst | 33 +++++++++++-------------------
 fs/proc/internal.h                 | 39 ------------------------------------
 fs/proc/task_mmu.c                 | 41 ++++++++++----------------------------
 3 files changed, 22 insertions(+), 91 deletions(-)

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 1224dc73e089..d2264240e43f 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -490,27 +490,18 @@ in memory, where each page is divided by the number of processes sharing it.
 So if a process has 1000 pages all to itself, and 1000 shared with one other
 process, its PSS will be 1500.  "Pss_Dirty" is the portion of PSS which
 consists of dirty pages.  ("Pss_Clean" is not included, but it can be
-calculated by subtracting "Pss_Dirty" from "Pss".)
-
-Traditionally, a page is accounted as "private" if it is mapped exactly once,
-and a page is accounted as "shared" when mapped multiple times, even when
-mapped in the same process multiple times. Note that this accounting is
-independent of MAP_SHARED.
-
-In some kernel configurations, the semantics of pages part of a larger
-allocation (e.g., THP) can differ: a page is accounted as "private" if all
-pages part of the corresponding large allocation are *certainly* mapped in the
-same process, even if the page is mapped multiple times in that process. A
-page is accounted as "shared" if any page page of the larger allocation
-is *maybe* mapped in a different process. In some cases, a large allocation
-might be treated as "maybe mapped by multiple processes" even though this
-is no longer the case.
-
-Some kernel configurations do not track the precise number of times a page part
-of a larger allocation is mapped. In this case, when calculating the PSS, the
-average number of mappings per page in this larger allocation might be used
-as an approximation for the number of mappings of a page. The PSS calculation
-will be imprecise in this case.
+calculated by subtracting "Pss_Dirty" from "Pss".)  In some scenarios where
+larger allocations (e.g., THP) are used, the PSS can be sightly imprecise,
+as precise information about how many processes share a page is not available
+for individual pages in such allocations.
+
+A page is accounted as "private" if it is currently *certainly* exclusively
+mapped in this process, and as "shared" if the page *might be* mapped into
+multiple processes.  Note that this accounting is independent of MAP_SHARED.
+In the past, pages that were mapped exactly once were accounted as "private",
+and pages with multiple mappings, even if in the same process, as "shared".
+As this precise information is not available for pages that are part of large
+allocations (e.g., THP), the semantics have been slightly adjusted.
 
 "Referenced" indicates the amount of memory currently marked as referenced or
 accessed.
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c1e8eb984da8..a5908167ce2d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -161,45 +161,6 @@ unsigned name_to_int(const struct qstr *qstr);
 /* Worst case buffer size needed for holding an integer. */
 #define PROC_NUMBUF 13
 
-#ifdef CONFIG_PAGE_MAPCOUNT
-/**
- * folio_precise_page_mapcount() - Number of mappings of this folio page.
- * @folio: The folio.
- * @page: The page.
- *
- * The number of present user page table entries that reference this page
- * as tracked via the RMAP: either referenced directly (PTE) or as part of
- * a larger area that covers this page (e.g., PMD).
- *
- * Use this function only for the calculation of existing statistics
- * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount).
- *
- * Do not add new users.
- *
- * Returns: The number of mappings of this folio page. 0 for
- * folios that are not mapped to user space or are not tracked via the RMAP
- * (e.g., shared zeropage).
- */
-static inline int folio_precise_page_mapcount(struct folio *folio,
-		struct page *page)
-{
-	int mapcount = atomic_read(&page->_mapcount) + 1;
-
-	if (page_mapcount_is_type(mapcount))
-		mapcount = 0;
-	if (folio_test_large(folio))
-		mapcount += folio_entire_mapcount(folio);
-
-	return mapcount;
-}
-#else /* !CONFIG_PAGE_MAPCOUNT */
-static inline int folio_precise_page_mapcount(struct folio *folio,
-		struct page *page)
-{
-	BUILD_BUG();
-}
-#endif /* CONFIG_PAGE_MAPCOUNT */
-
 /**
  * folio_average_page_mapcount() - Average number of mappings per page in this
  *				   folio
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 55b037768c60..7b212fb6ae6c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -918,10 +918,9 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 		bool present)
 {
 	struct folio *folio = page_folio(page);
-	int i, nr = compound ? compound_nr(page) : 1;
-	unsigned long size = nr * PAGE_SIZE;
-	bool exclusive;
-	int mapcount;
+	const unsigned long size = compound ? folio_size(folio) : PAGE_SIZE;
+	unsigned long pss = size << PSS_SHIFT;
+	bool private = false;
 
 	/*
 	 * First accumulate quantities that depend only on |size| and the type
@@ -943,13 +942,6 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 		mss->referenced += size;
 
 	/*
-	 * Then accumulate quantities that may depend on sharing, or that may
-	 * differ page-by-page.
-	 *
-	 * refcount == 1 for present entries guarantees that the folio is mapped
-	 * exactly once. For large folios this implies that exactly one
-	 * PTE/PMD/... maps (a part of) this folio.
-	 *
 	 * Treat all non-present entries (where relying on the mapcount and
 	 * refcount doesn't make sense) as "maybe shared, but not sure how
 	 * often". We treat device private entries as being fake-present.
@@ -957,30 +949,17 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
 	 * Note that it would not be safe to read the mapcount especially for
 	 * pages referenced by migration entries, even with the PTL held.
 	 */
-	if (folio_ref_count(folio) == 1 || !present) {
-		smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT,
-				      dirty, locked, present);
-		return;
-	}
-
-	if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
-		mapcount = folio_average_page_mapcount(folio);
-		exclusive = !folio_maybe_mapped_shared(folio);
-	}
-
-	for (i = 0; i < nr; i++, page++) {
-		unsigned long pss = PAGE_SIZE << PSS_SHIFT;
-
-		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
-			mapcount = folio_precise_page_mapcount(folio, page);
-			exclusive = mapcount < 2;
-		}
+	if (present && folio_ref_count(folio) == 1) {
+		/* Single mapping, no need to mess with mapcounts. */
+		private = true;
+	} else if (present) {
+		const int mapcount = folio_average_page_mapcount(folio);
 
 		if (mapcount >= 2)
 			pss /= mapcount;
-		smaps_page_accumulate(mss, folio, PAGE_SIZE, pss,
-				dirty, locked, exclusive);
+		private = !folio_maybe_mapped_shared(folio);
 	}
+	smaps_page_accumulate(mss, folio, size, pss, dirty, locked, private);
 }
 
 #ifdef CONFIG_SHMEM

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 07/13] mm/rmap: remove CONFIG_PAGE_MAPCOUNT
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

page->mapcount is still updated but essentially unused. So let's
remove CONFIG_PAGE_MAPCOUNT. Given that CONFIG_NO_PAGE_MAPCOUNT is the
only remaining variant, that Kconfig can go as well.

We can replace some instances of "orig_nr_pages" by the "nr_pages" as
the latter is no longer modified.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 Documentation/mm/transhuge.rst |  3 ---
 include/linux/rmap.h           | 11 +----------
 mm/Kconfig                     | 17 -----------------
 mm/rmap.c                      | 36 ++++++------------------------------
 4 files changed, 7 insertions(+), 60 deletions(-)

diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index f200c1ac19cb..eb5ac076e4c6 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -129,9 +129,6 @@ pages:
     corresponding mapcount), and the current status ("maybe mapped shared" vs.
     "mapped exclusively").
 
-    With CONFIG_PAGE_MAPCOUNT, we also increment/decrement
-    page->_mapcount.
-
 split_huge_page internally has to distribute the refcounts in the head
 page to the tail pages before clearing all PG_head/tail bits from the page
 structures. It can be done easily for refcounts taken by page table
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e5569f5fdaec..4894e43e5f52 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -493,8 +493,6 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
 		enum pgtable_level level)
 {
-	const int orig_nr_pages = nr_pages;
-
 	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
 
 	switch (level) {
@@ -504,12 +502,7 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
 			break;
 		}
 
-		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
-			do {
-				atomic_inc(&page->_mapcount);
-			} while (page++, --nr_pages > 0);
-		}
-		folio_add_large_mapcount(folio, orig_nr_pages, dst_vma);
+		folio_add_large_mapcount(folio, nr_pages, dst_vma);
 		break;
 	case PGTABLE_LEVEL_PMD:
 	case PGTABLE_LEVEL_PUD:
@@ -608,8 +601,6 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 		do {
 			if (PageAnonExclusive(page))
 				ClearPageAnonExclusive(page);
-			if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-				atomic_inc(&page->_mapcount);
 		} while (page++, --nr_pages > 0);
 		folio_add_large_mapcount(folio, orig_nr_pages, dst_vma);
 		break;
diff --git a/mm/Kconfig b/mm/Kconfig
index bd283958d675..576db4fdf16e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -948,25 +948,8 @@ config READ_ONLY_THP_FOR_FS
 	  support of file THPs will be developed in the next few release
 	  cycles.
 
-config NO_PAGE_MAPCOUNT
-	bool "No per-page mapcount (EXPERIMENTAL)"
-	help
-	  Do not maintain per-page mapcounts for pages part of larger
-	  allocations, such as transparent huge pages.
-
-	  When this config option is enabled, some interfaces that relied on
-	  this information will rely on less-precise per-allocation information
-	  instead: for example, using the average per-page mapcount in such
-	  a large allocation instead of the per-page mapcount.
-
-	  EXPERIMENTAL because the impact of some changes is still unclear.
-
 endif # TRANSPARENT_HUGEPAGE
 
-# simple helper to make the code a bit easier to read
-config PAGE_MAPCOUNT
-	def_bool !NO_PAGE_MAPCOUNT
-
 #
 # The architecture supports pgtable leaves that is larger than PAGE_SIZE
 #
diff --git a/mm/rmap.c b/mm/rmap.c
index df42c38fe387..27488183448b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1354,7 +1354,6 @@ static __always_inline void __folio_add_rmap(struct folio *folio,
 		enum pgtable_level level)
 {
 	int nr = 0, nr_pmdmapped = 0, mapcount;
-	const int orig_nr_pages = nr_pages;
 
 	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
 
@@ -1365,14 +1364,8 @@ static __always_inline void __folio_add_rmap(struct folio *folio,
 			break;
 		}
 
-		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
-			do {
-				atomic_inc(&page->_mapcount);
-			} while (page++, --nr_pages > 0);
-		}
-
-		mapcount = folio_add_return_large_mapcount(folio, orig_nr_pages, vma);
-		if (mapcount == orig_nr_pages)
+		mapcount = folio_add_return_large_mapcount(folio, nr_pages, vma);
+		if (mapcount == nr_pages)
 			nr = folio_large_nr_pages(folio);
 		break;
 	case PGTABLE_LEVEL_PMD:
@@ -1518,15 +1511,6 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio,
 		VM_WARN_ON_FOLIO(folio_test_large(folio) &&
 				 folio_entire_mapcount(folio) > 1 &&
 				 PageAnonExclusive(cur_page), folio);
-		if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT))
-			continue;
-
-		/*
-		 * While PTE-mapping a THP we have a PMD and a PTE
-		 * mapping.
-		 */
-		VM_WARN_ON_FOLIO(atomic_read(&cur_page->_mapcount) > 0 &&
-				 PageAnonExclusive(cur_page), folio);
 	}
 
 	/*
@@ -1628,14 +1612,12 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
 		int i;
 
 		nr = folio_large_nr_pages(folio);
-		for (i = 0; i < nr; i++) {
-			struct page *page = folio_page(folio, i);
+		if (exclusive) {
+			for (i = 0; i < nr; i++) {
+				struct page *page = folio_page(folio, i);
 
-			if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
-				/* increment count (starts at -1) */
-				atomic_set(&page->_mapcount, 0);
-			if (exclusive)
 				SetPageAnonExclusive(page);
+			}
 		}
 
 		folio_set_large_mapcount(folio, nr, vma);
@@ -1769,12 +1751,6 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
 		if (!mapcount)
 			nr = folio_large_nr_pages(folio);
 
-		if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
-			do {
-				atomic_dec(&page->_mapcount);
-			} while (page++, --nr_pages > 0);
-		}
-
 		partially_mapped = __folio_certainly_partially_mapped(folio, mapcount);
 		break;
 	case PGTABLE_LEVEL_PMD:

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 08/13] mm: re-consolidate folio->_entire_mapcount
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

Now that we have some space left in page[1] of a large folio on 32bit,
we can re-consolidate folio->_entire_mapcount.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 include/linux/mm.h       |  4 +---
 include/linux/mm_types.h |  5 ++---
 mm/internal.h            |  5 ++---
 mm/page_alloc.c          | 12 ++++--------
 4 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 633bbf9a184a..1715c6ed14d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1832,9 +1832,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
  */
 static inline int folio_entire_mapcount(const struct folio *folio)
 {
-	VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
-	if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio_large_order(folio) == 1))
-		return 0;
+	VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
 	return atomic_read(&folio->_entire_mapcount) + 1;
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 47b2c3d05f41..1e1befe7d418 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -452,9 +452,9 @@ struct folio {
 				struct {
 	/* public: */
 					atomic_t _large_mapcount;
-					unsigned int _unused_1;
-#ifdef CONFIG_64BIT
 					atomic_t _entire_mapcount;
+#ifdef CONFIG_64BIT
+					unsigned int _unused_1;
 					atomic_t _pincount;
 #endif /* CONFIG_64BIT */
 					mm_id_mapcount_t _mm_id_mapcount[2];
@@ -483,7 +483,6 @@ struct folio {
 	/* public: */
 			struct list_head _deferred_list;
 #ifndef CONFIG_64BIT
-			atomic_t _entire_mapcount;
 			atomic_t _pincount;
 #endif /* !CONFIG_64BIT */
 	/* private: the union with struct page is transitional */
diff --git a/mm/internal.h b/mm/internal.h
index 30e48f39d2de..53b20de141b9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -868,10 +868,9 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
 		folio->_mm_id_mapcount[0] = -1;
 		folio->_mm_id_mapcount[1] = -1;
 	}
-	if (IS_ENABLED(CONFIG_64BIT) || order > 1) {
+	atomic_set(&folio->_entire_mapcount, -1);
+	if (IS_ENABLED(CONFIG_64BIT) || order > 1)
 		atomic_set(&folio->_pincount, 0);
-		atomic_set(&folio->_entire_mapcount, -1);
-	}
 	if (order > 1)
 		INIT_LIST_HEAD(&folio->_deferred_list);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8888f31aca49..1c09d79cade3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1121,11 +1121,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
 				goto out;
 			}
 		}
+		if (folio_entire_mapcount(folio)) {
+			bad_page(page, "nonzero entire_mapcount");
+			goto out;
+		}
 		if (IS_ENABLED(CONFIG_64BIT)) {
-			if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) {
-				bad_page(page, "nonzero entire_mapcount");
-				goto out;
-			}
 			if (unlikely(atomic_read(&folio->_pincount))) {
 				bad_page(page, "nonzero pincount");
 				goto out;
@@ -1139,10 +1139,6 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
 			goto out;
 		}
 		if (!IS_ENABLED(CONFIG_64BIT)) {
-			if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) {
-				bad_page(page, "nonzero entire_mapcount");
-				goto out;
-			}
 			if (unlikely(atomic_read(&folio->_pincount))) {
 				bad_page(page, "nonzero pincount");
 				goto out;

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 09/13] mm: move _large_mapcount to _mapcount in page[1] of a large folio
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

Now that the _mapcount in tail pages is completely unused, we can
re-purpose it to ... store another mapcount.

In theory, it should now unnecessary to initialize the large mapcount to -1
in prep_compound_head(), but let's keep doing that for now.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 include/linux/mm_types.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1e1befe7d418..e59571d2f81d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -155,8 +155,7 @@ struct page {
 		/*
 		 * For head pages of typed folios, the value stored here
 		 * allows for determining what this page is used for. The
-		 * tail pages of typed folios will not store a type
-		 * (page_type == _mapcount == -1).
+		 * tail pages of typed folios will not store a type.
 		 *
 		 * See page-flags.h for a list of page types which are currently
 		 * stored here.
@@ -378,6 +377,7 @@ typedef unsigned short mm_id_t;
  * @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
  * @_large_mapcount: Do not use directly, call folio_mapcount().
  * @_unused_1: Temporary placeholder.
+ * @_unused_2: Temporary placeholder.
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_nr_pages: Do not use directly, call folio_nr_pages().
  * @_mm_id: Do not use outside of rmap code.
@@ -451,7 +451,7 @@ struct folio {
 			union {
 				struct {
 	/* public: */
-					atomic_t _large_mapcount;
+					unsigned int _unused_2;
 					atomic_t _entire_mapcount;
 #ifdef CONFIG_64BIT
 					unsigned int _unused_1;
@@ -466,7 +466,7 @@ struct folio {
 				};
 				unsigned long _usable_1[4];
 			};
-			atomic_t _mapcount_1;
+			atomic_t _large_mapcount;
 			atomic_t _refcount_1;
 	/* public: */
 #ifdef NR_PAGES_IN_LARGE_FOLIO
@@ -529,7 +529,7 @@ FOLIO_MATCH(_last_cpupid, _last_cpupid);
 			offsetof(struct page, pg) + sizeof(struct page))
 FOLIO_MATCH(flags, _flags_1);
 FOLIO_MATCH(compound_info, _head_1);
-FOLIO_MATCH(_mapcount, _mapcount_1);
+FOLIO_MATCH(_mapcount, _large_mapcount);
 FOLIO_MATCH(_refcount, _refcount_1);
 #undef FOLIO_MATCH
 #define FOLIO_MATCH(pg, fl)						\

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 10/13] mm: re-consolidate folio->_pincount
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

Now that we have some space left in page[1] of a large folio on 32bit,
we can re-consolidate folio->_pincount.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 include/linux/mm.h       |  4 +---
 include/linux/mm_types.h |  7 ++-----
 mm/debug.c               |  5 +----
 mm/internal.h            |  3 +--
 mm/page_alloc.c          | 14 +++-----------
 5 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1715c6ed14d4..6dd906585420 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2593,9 +2593,7 @@ static inline pud_t folio_mk_pud(const struct folio *folio, pgprot_t pgprot)
 
 static inline bool folio_has_pincount(const struct folio *folio)
 {
-	if (IS_ENABLED(CONFIG_64BIT))
-		return folio_test_large(folio);
-	return folio_order(folio) > 1;
+	return folio_test_large(folio);
 }
 
 /**
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e59571d2f81d..450f61cad678 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -451,11 +451,11 @@ struct folio {
 			union {
 				struct {
 	/* public: */
-					unsigned int _unused_2;
+					atomic_t _pincount;
 					atomic_t _entire_mapcount;
 #ifdef CONFIG_64BIT
 					unsigned int _unused_1;
-					atomic_t _pincount;
+					unsigned int _unused_2;
 #endif /* CONFIG_64BIT */
 					mm_id_mapcount_t _mm_id_mapcount[2];
 					union {
@@ -482,9 +482,6 @@ struct folio {
 			unsigned long _head_2;
 	/* public: */
 			struct list_head _deferred_list;
-#ifndef CONFIG_64BIT
-			atomic_t _pincount;
-#endif /* !CONFIG_64BIT */
 	/* private: the union with struct page is transitional */
 		};
 		struct page __page_2;
diff --git a/mm/debug.c b/mm/debug.c
index bfb41ef17a5e..80e050bf29ba 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -81,10 +81,7 @@ static void __dump_folio(const struct folio *folio, const struct page *page,
 			folio_ref_count(folio), mapcount, mapping,
 			folio->index + idx, pfn);
 	if (folio_test_large(folio)) {
-		int pincount = 0;
-
-		if (folio_has_pincount(folio))
-			pincount = atomic_read(&folio->_pincount);
+		int pincount = atomic_read(&folio->_pincount);
 
 		pr_warn("head: order:%u mapcount:%d entire_mapcount:%d pincount:%d\n",
 				folio_order(folio),
diff --git a/mm/internal.h b/mm/internal.h
index 53b20de141b9..aa1206495bc6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -869,8 +869,7 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
 		folio->_mm_id_mapcount[1] = -1;
 	}
 	atomic_set(&folio->_entire_mapcount, -1);
-	if (IS_ENABLED(CONFIG_64BIT) || order > 1)
-		atomic_set(&folio->_pincount, 0);
+	atomic_set(&folio->_pincount, 0);
 	if (order > 1)
 		INIT_LIST_HEAD(&folio->_deferred_list);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1c09d79cade3..8ed4c73fdba4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1125,11 +1125,9 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
 			bad_page(page, "nonzero entire_mapcount");
 			goto out;
 		}
-		if (IS_ENABLED(CONFIG_64BIT)) {
-			if (unlikely(atomic_read(&folio->_pincount))) {
-				bad_page(page, "nonzero pincount");
-				goto out;
-			}
+		if (unlikely(atomic_read(&folio->_pincount))) {
+			bad_page(page, "nonzero pincount");
+			goto out;
 		}
 		break;
 	case 2:
@@ -1138,12 +1136,6 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
 			bad_page(page, "on deferred list");
 			goto out;
 		}
-		if (!IS_ENABLED(CONFIG_64BIT)) {
-			if (unlikely(atomic_read(&folio->_pincount))) {
-				bad_page(page, "nonzero pincount");
-				goto out;
-			}
-		}
 		break;
 	case 3:
 		/* the third tail page: hugetlb specifics overlap ->mappings */

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 11/13] mm/rmap: stop using the entire mapcount for hugetlb folios
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

There is no real reason why hugetlb still updates the entire mapcount:
the value always corresponds to folio_mapcount().

As we want to change the semantics of the entire mapcount in a way
incompatible with hugetlb, let's just stop using the entire mapcount
for hugetlb folios entirely.

We only have to teach folio_average_page_mapcount() about the change.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 fs/proc/internal.h   | 3 +++
 include/linux/mm.h   | 2 ++
 include/linux/rmap.h | 3 ---
 mm/debug.c           | 2 +-
 mm/rmap.c            | 4 +---
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index a5908167ce2d..1dd46e55c850 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -186,6 +186,9 @@ static inline int folio_average_page_mapcount(struct folio *folio)
 	mapcount = folio_large_mapcount(folio);
 	if (unlikely(mapcount <= 0))
 		return 0;
+	if (folio_test_hugetlb(folio))
+		return mapcount;
+
 	entire_mapcount = folio_entire_mapcount(folio);
 	if (mapcount <= entire_mapcount)
 		return entire_mapcount;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6dd906585420..3092db64a009 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1829,6 +1829,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
  * How many times the entire folio is mapped as a single unit (eg by a
  * PMD or PUD entry).  This is probably not what you want, except for
  * debugging purposes or implementation of other core folio_*() primitives.
+ *
+ * Always 0 for hugetlb folios.
  */
 static inline int folio_entire_mapcount(const struct folio *folio)
 {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 4894e43e5f52..b81b1d9e1eaa 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -443,7 +443,6 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio,
 			return -EBUSY;
 		ClearPageAnonExclusive(&folio->page);
 	}
-	atomic_inc(&folio->_entire_mapcount);
 	atomic_inc(&folio->_large_mapcount);
 	return 0;
 }
@@ -477,7 +476,6 @@ static inline void hugetlb_add_file_rmap(struct folio *folio)
 	VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
 	VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
 
-	atomic_inc(&folio->_entire_mapcount);
 	atomic_inc(&folio->_large_mapcount);
 }
 
@@ -485,7 +483,6 @@ static inline void hugetlb_remove_rmap(struct folio *folio)
 {
 	VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
 
-	atomic_dec(&folio->_entire_mapcount);
 	atomic_dec(&folio->_large_mapcount);
 }
 
diff --git a/mm/debug.c b/mm/debug.c
index 80e050bf29ba..82baaf87ef3d 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -86,7 +86,7 @@ static void __dump_folio(const struct folio *folio, const struct page *page,
 		pr_warn("head: order:%u mapcount:%d entire_mapcount:%d pincount:%d\n",
 				folio_order(folio),
 				folio_mapcount(folio),
-				folio_entire_mapcount(folio),
+				folio_entire_mapcount(folio);
 				pincount);
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 27488183448b..d08927949284 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -3042,11 +3042,10 @@ void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
 	VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
 	VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
 
-	atomic_inc(&folio->_entire_mapcount);
 	atomic_inc(&folio->_large_mapcount);
 	if (flags & RMAP_EXCLUSIVE)
 		SetPageAnonExclusive(&folio->page);
-	VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 &&
+	VM_WARN_ON_FOLIO(folio_large_mapcount(folio) > 1 &&
 			 PageAnonExclusive(&folio->page), folio);
 }
 
@@ -3057,7 +3056,6 @@ void hugetlb_add_new_anon_rmap(struct folio *folio,
 
 	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	/* increment count (starts at -1) */
-	atomic_set(&folio->_entire_mapcount, 0);
 	atomic_set(&folio->_large_mapcount, 0);
 	folio_clear_hugetlb_restore_reserve(folio);
 	__folio_set_anon(folio, vma, address, true);

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 12/13] mm/rmap: large mapcount interface cleanups
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

Let's prepare for passing another counter by renaming diff/mapcount to
"nr_mappings" and just using an "unsigned int".

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 include/linux/rmap.h | 61 ++++++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b81b1d9e1eaa..5a02ffd3744a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -133,10 +133,10 @@ static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id)
 }
 
 static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio,
-		int diff, mm_id_t mm_id)
+		unsigned int nr_mappings, mm_id_t mm_id)
 {
 	VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio));
-	VM_WARN_ON_ONCE(diff <= 0);
+	VM_WARN_ON_ONCE(nr_mappings == 0);
 	VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX);
 
 	/*
@@ -145,7 +145,7 @@ static inline void __folio_large_mapcount_sanity_checks(const struct folio *foli
 	 * a check on 32bit, where we currently reduce the size of the per-MM
 	 * mapcount to a short.
 	 */
-	VM_WARN_ON_ONCE(diff > folio_large_nr_pages(folio));
+	VM_WARN_ON_ONCE(nr_mappings > folio_large_nr_pages(folio));
 	VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX);
 
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY &&
@@ -161,29 +161,29 @@ static inline void __folio_large_mapcount_sanity_checks(const struct folio *foli
 }
 
 static __always_inline void folio_set_large_mapcount(struct folio *folio,
-		int mapcount, struct vm_area_struct *vma)
+		unsigned int nr_mappings, struct vm_area_struct *vma)
 {
-	__folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id);
+	__folio_large_mapcount_sanity_checks(folio, nr_mappings, vma->vm_mm->mm_id);
 
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY);
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY);
 
 	/* Note: mapcounts start at -1. */
-	atomic_set(&folio->_large_mapcount, mapcount - 1);
-	folio->_mm_id_mapcount[0] = mapcount - 1;
+	atomic_set(&folio->_large_mapcount, nr_mappings - 1);
+	folio->_mm_id_mapcount[0] = nr_mappings - 1;
 	folio_set_mm_id(folio, 0, vma->vm_mm->mm_id);
 }
 
 static __always_inline int folio_add_return_large_mapcount(struct folio *folio,
-		int diff, struct vm_area_struct *vma)
+		unsigned int nr_mappings, struct vm_area_struct *vma)
 {
 	const mm_id_t mm_id = vma->vm_mm->mm_id;
 	int new_mapcount_val;
 
 	folio_lock_large_mapcount(folio);
-	__folio_large_mapcount_sanity_checks(folio, diff, mm_id);
+	__folio_large_mapcount_sanity_checks(folio, nr_mappings, mm_id);
 
-	new_mapcount_val = atomic_read(&folio->_large_mapcount) + diff;
+	new_mapcount_val = atomic_read(&folio->_large_mapcount) + nr_mappings;
 	atomic_set(&folio->_large_mapcount, new_mapcount_val);
 
 	/*
@@ -194,14 +194,14 @@ static __always_inline int folio_add_return_large_mapcount(struct folio *folio,
 	 * we might be in trouble when unmapping pages later.
 	 */
 	if (folio_mm_id(folio, 0) == mm_id) {
-		folio->_mm_id_mapcount[0] += diff;
+		folio->_mm_id_mapcount[0] += nr_mappings;
 		if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[0] < 0)) {
 			folio->_mm_id_mapcount[0] = -1;
 			folio_set_mm_id(folio, 0, MM_ID_DUMMY);
 			folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
 		}
 	} else if (folio_mm_id(folio, 1) == mm_id) {
-		folio->_mm_id_mapcount[1] += diff;
+		folio->_mm_id_mapcount[1] += nr_mappings;
 		if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[1] < 0)) {
 			folio->_mm_id_mapcount[1] = -1;
 			folio_set_mm_id(folio, 1, MM_ID_DUMMY);
@@ -209,13 +209,13 @@ static __always_inline int folio_add_return_large_mapcount(struct folio *folio,
 		}
 	} else if (folio_mm_id(folio, 0) == MM_ID_DUMMY) {
 		folio_set_mm_id(folio, 0, mm_id);
-		folio->_mm_id_mapcount[0] = diff - 1;
+		folio->_mm_id_mapcount[0] = nr_mappings - 1;
 		/* We might have other mappings already. */
-		if (new_mapcount_val != diff - 1)
+		if (new_mapcount_val != nr_mappings - 1)
 			folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
 	} else if (folio_mm_id(folio, 1) == MM_ID_DUMMY) {
 		folio_set_mm_id(folio, 1, mm_id);
-		folio->_mm_id_mapcount[1] = diff - 1;
+		folio->_mm_id_mapcount[1] = nr_mappings - 1;
 		/* Slot 0 certainly has mappings as well. */
 		folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
 	}
@@ -225,15 +225,15 @@ static __always_inline int folio_add_return_large_mapcount(struct folio *folio,
 #define folio_add_large_mapcount folio_add_return_large_mapcount
 
 static __always_inline int folio_sub_return_large_mapcount(struct folio *folio,
-		int diff, struct vm_area_struct *vma)
+		unsigned int nr_mappings, struct vm_area_struct *vma)
 {
 	const mm_id_t mm_id = vma->vm_mm->mm_id;
 	int new_mapcount_val;
 
 	folio_lock_large_mapcount(folio);
-	__folio_large_mapcount_sanity_checks(folio, diff, mm_id);
+	__folio_large_mapcount_sanity_checks(folio, nr_mappings, mm_id);
 
-	new_mapcount_val = atomic_read(&folio->_large_mapcount) - diff;
+	new_mapcount_val = atomic_read(&folio->_large_mapcount) - nr_mappings;
 	atomic_set(&folio->_large_mapcount, new_mapcount_val);
 
 	/*
@@ -243,13 +243,13 @@ static __always_inline int folio_sub_return_large_mapcount(struct folio *folio,
 	 * negative.
 	 */
 	if (folio_mm_id(folio, 0) == mm_id) {
-		folio->_mm_id_mapcount[0] -= diff;
+		folio->_mm_id_mapcount[0] -= nr_mappings;
 		if (folio->_mm_id_mapcount[0] >= 0)
 			goto out;
 		folio->_mm_id_mapcount[0] = -1;
 		folio_set_mm_id(folio, 0, MM_ID_DUMMY);
 	} else if (folio_mm_id(folio, 1) == mm_id) {
-		folio->_mm_id_mapcount[1] -= diff;
+		folio->_mm_id_mapcount[1] -= nr_mappings;
 		if (folio->_mm_id_mapcount[1] >= 0)
 			goto out;
 		folio->_mm_id_mapcount[1] = -1;
@@ -275,35 +275,36 @@ static __always_inline int folio_sub_return_large_mapcount(struct folio *folio,
  * See __folio_rmap_sanity_checks(), we might map large folios even without
  * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now.
  */
-static inline void folio_set_large_mapcount(struct folio *folio, int mapcount,
+static inline void folio_set_large_mapcount(struct folio *folio,
+		unsigned int nr_mappings,
 		struct vm_area_struct *vma)
 {
 	/* Note: mapcounts start at -1. */
-	atomic_set(&folio->_large_mapcount, mapcount - 1);
+	atomic_set(&folio->_large_mapcount, nr_mappings - 1);
 }
 
 static inline void folio_add_large_mapcount(struct folio *folio,
-		int diff, struct vm_area_struct *vma)
+		unsigned int nr_mappings, struct vm_area_struct *vma)
 {
-	atomic_add(diff, &folio->_large_mapcount);
+	atomic_add(nr_mappings, &folio->_large_mapcount);
 }
 
 static inline int folio_add_return_large_mapcount(struct folio *folio,
-		int diff, struct vm_area_struct *vma)
+		unsigned int nr_mappings, struct vm_area_struct *vma)
 {
-	return atomic_add_return(diff, &folio->_large_mapcount) + 1;
+	return atomic_add_return(nr_mappings, &folio->_large_mapcount) + 1;
 }
 
 static inline void folio_sub_large_mapcount(struct folio *folio,
-		int diff, struct vm_area_struct *vma)
+		unsigned int nr_mappings, struct vm_area_struct *vma)
 {
-	atomic_sub(diff, &folio->_large_mapcount);
+	atomic_sub(nr_mappings, &folio->_large_mapcount);
 }
 
 static inline int folio_sub_return_large_mapcount(struct folio *folio,
-		int diff, struct vm_area_struct *vma)
+		unsigned int nr_mappings, struct vm_area_struct *vma)
 {
-	return atomic_sub_return(diff, &folio->_large_mapcount) + 1;
+	return atomic_sub_return(nr_mappings, &folio->_large_mapcount) + 1;
 }
 #endif /* CONFIG_MM_ID */
 

-- 
2.43.0


^ permalink raw reply related

* [PATCH RFC 13/13] mm/rmap: support arbitrary folio mappings
From: David Hildenbrand (Arm) @ 2026-04-12 18:59 UTC (permalink / raw)
  To: Tejun Heo, Johannes Weiner, Michal Koutný, Jonathan Corbet,
	Shuah Khan, Andrew Morton, Lorenzo Stoakes, Liam R. Howlett,
	Vlastimil Babka, Mike Rapoport, Suren Baghdasaryan, Michal Hocko,
	Rik van Riel, Harry Yoo, Jann Horn, Brendan Jackman, Zi Yan,
	Pedro Falcato, Matthew Wilcox
  Cc: cgroups, linux-doc, linux-kernel, linux-mm, linux-fsdevel,
	David Hildenbrand (Arm)
In-Reply-To: <20260412-mapcount-v1-0-05e8dfab52e0@kernel.org>

Let's replace the entire mapcount by the sum of mapped pages ("total mapped
pages"), which we update alongside the mapcount under the large mapcount
lock.

This allows for teaching all rmap code to just support arbitrary folio
mappings: PUD-sized folio being mapped by PMDs and PTEs, or
mapping folios that span multiple PMDs/PUDs. Note that calling code still
has to be updated to support that.

For example, a PMD-sized large folio with 512 pages that is mapped
through 2 PMDs and a single PTE has mapcount == 3 and 1025 total mapped
pages.

Calculating folio_average_page_mapcount() is now trivial. Provide a
new helper folio_total_mapped_pages() for that purpose. Similarly,
detecting certainly partially mapped folios in
__folio_certainly_partially_mapped() when unmapping is now trivial.

Pass another parameter ("nr_pages") to the large mapcount helpers that
update the new folio->_total_mapped_pages counter atomically with the
mapcount, and return the new value alongside the new mapcount.

We can keep maintaining the PMD statistics for PMD-sized THPs
(e.g., AnonHugePages) based on the new mapcount and the new total mapped
pages quite neatly, without the need for an additional pmd mapcount.

This all cleans up the code nicely. Introduce pgtable_level_to_order() to
easily convert from a pgtable_level to the mapping order so we can

Is an unsigned long for "total mapped pages" sufficient on 32bit? Maybe
not, but it is a similar problem to an "int" being insufficient to store
the mapcount on 64bit (and likely on 32bit) when triggering many PTE
mappings. Likely, for the time being, we might just want to prevent
overflowing both of these counters by teaching rmap code to fail early, or
letting calling code do some opportunistic checks: we don't expect current
reasonable use cases to overflow these counters.

Note that the !CONFIG_MM_ID implementation only exists for cases where
rmap code is called with a large folio even though THPs are not
supported by the kernel config: PMD/PUD mappings are impossible in such
configurations, and proper large folios are not possible. In the future,
we will remove this code entirely, as these pages are not actual folios,
and we can just enable CONFIG_MM_ID in

No functional change intended.

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 Documentation/mm/transhuge.rst |   5 +-
 fs/proc/internal.h             |  22 ++---
 include/linux/mm.h             |  33 +++++---
 include/linux/mm_types.h       |   6 +-
 include/linux/pgtable.h        |  22 +++++
 include/linux/rmap.h           | 184 +++++++++++++++++++----------------------
 mm/debug.c                     |   4 +-
 mm/internal.h                  |   2 +-
 mm/memory.c                    |   3 +-
 mm/page_alloc.c                |   4 +-
 mm/rmap.c                      | 165 ++++++++++++++++--------------------
 11 files changed, 214 insertions(+), 236 deletions(-)

diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index eb5ac076e4c6..76d3413a5b6b 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -116,14 +116,15 @@ pages:
     succeeds on tail pages.
 
   - map/unmap of a PMD entry for the whole THP increment/decrement
-    folio->_entire_mapcount and folio->_large_mapcount.
+    folio->_large_mapcount and add/remove HPAGE_PMD_NR to
+    folio->_total_mapped_pages.
 
     We also maintain the two slots for tracking MM owners (MM ID and
     corresponding mapcount), and the current status ("maybe mapped shared" vs.
     "mapped exclusively").
 
   - map/unmap of individual pages with PTE entry increment/decrement
-    folio->_large_mapcount.
+    folio->_total_mapped_pages and folio->_large_mapcount.
 
     We also maintain the two slots for tracking MM owners (MM ID and
     corresponding mapcount), and the current status ("maybe mapped shared" vs.
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1dd46e55c850..fae901769529 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -178,26 +178,16 @@ unsigned name_to_int(const struct qstr *qstr);
  */
 static inline int folio_average_page_mapcount(struct folio *folio)
 {
-	int mapcount, entire_mapcount, avg;
+	unsigned long total_mapped_pages = folio_total_mapped_pages(folio);
+	const unsigned int order = folio_order(folio);
 
-	if (!folio_test_large(folio))
-		return atomic_read(&folio->_mapcount) + 1;
-
-	mapcount = folio_large_mapcount(folio);
-	if (unlikely(mapcount <= 0))
-		return 0;
-	if (folio_test_hugetlb(folio))
-		return mapcount;
-
-	entire_mapcount = folio_entire_mapcount(folio);
-	if (mapcount <= entire_mapcount)
-		return entire_mapcount;
-	mapcount -= entire_mapcount;
+	if (!total_mapped_pages || !order)
+		return total_mapped_pages;
 
 	/* Round to closest integer ... */
-	avg = ((unsigned int)mapcount + folio_large_nr_pages(folio) / 2) >> folio_large_order(folio);
+	total_mapped_pages += 1ul << (order - 1);
 	/* ... but return at least 1. */
-	return max_t(int, avg + entire_mapcount, 1);
+	return max(total_mapped_pages >> order, 1);
 }
 /*
  * array.c
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3092db64a009..b1c55e0cd317 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1825,19 +1825,6 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 }
 #endif
 
-/*
- * How many times the entire folio is mapped as a single unit (eg by a
- * PMD or PUD entry).  This is probably not what you want, except for
- * debugging purposes or implementation of other core folio_*() primitives.
- *
- * Always 0 for hugetlb folios.
- */
-static inline int folio_entire_mapcount(const struct folio *folio)
-{
-	VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
-	return atomic_read(&folio->_entire_mapcount) + 1;
-}
-
 static inline int folio_large_mapcount(const struct folio *folio)
 {
 	VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
@@ -1888,6 +1875,26 @@ static inline bool folio_mapped(const struct folio *folio)
 	return folio_mapcount(folio) >= 1;
 }
 
+/**
+ * folio_total_mapped_pages - total mapped pages across all mappings
+ * @folio: The folio.
+ *
+ * Return the total number of pages mapped by all mappings of this folio.
+ * A page mapped multiple times is counted multiple times.
+ *
+ * For example, a single folio mapped through two PMD-sized mappings will
+ * contribute 1024 pages to the total on systems where a PMD maps 512 pages.
+ *
+ * Return: Total number of mapped pages across all mappings of @folio.
+ */
+static inline unsigned long folio_total_mapped_pages(const struct folio *folio)
+{
+	if (!folio_test_large(folio) || folio_test_hugetlb(folio) ||
+	    !IS_ENABLED(CONFIG_MM_ID))
+		return folio_mapcount(folio);
+	return atomic_long_read(&folio->_total_mapped_pages);
+}
+
 /*
  * Return true if this page is mapped into pagetables.
  * For compound page it returns true if any sub-page of compound page is mapped,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 450f61cad678..93e05c4fd7b3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -374,10 +374,9 @@ typedef unsigned short mm_id_t;
  * @pgmap: Metadata for ZONE_DEVICE mappings
  * @virtual: Virtual address in the kernel direct map.
  * @_last_cpupid: IDs of last CPU and last process that accessed the folio.
- * @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
+ * @_total_mapped_pages: Do not use directly, call folio_total_mapped_pages().
  * @_large_mapcount: Do not use directly, call folio_mapcount().
  * @_unused_1: Temporary placeholder.
- * @_unused_2: Temporary placeholder.
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_nr_pages: Do not use directly, call folio_nr_pages().
  * @_mm_id: Do not use outside of rmap code.
@@ -452,11 +451,10 @@ struct folio {
 				struct {
 	/* public: */
 					atomic_t _pincount;
-					atomic_t _entire_mapcount;
 #ifdef CONFIG_64BIT
 					unsigned int _unused_1;
-					unsigned int _unused_2;
 #endif /* CONFIG_64BIT */
+					atomic_long_t _total_mapped_pages;
 					mm_id_mapcount_t _mm_id_mapcount[2];
 					union {
 						mm_id_t _mm_id[2];
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index cdd68ed3ae1a..2351205d9076 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -7,6 +7,8 @@
 
 #define PMD_ORDER	(PMD_SHIFT - PAGE_SHIFT)
 #define PUD_ORDER	(PUD_SHIFT - PAGE_SHIFT)
+#define P4D_ORDER	(P4D_SHIFT - PAGE_SHIFT)
+#define PGDIR_ORDER	(PGDIR_SHIFT - PAGE_SHIFT)
 
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_MMU
@@ -2243,6 +2245,26 @@ static inline const char *pgtable_level_to_str(enum pgtable_level level)
 	}
 }
 
+#ifdef CONFIG_MMU
+static __always_inline unsigned int pgtable_level_to_order(enum pgtable_level level)
+{
+	switch (level) {
+	case PGTABLE_LEVEL_PTE:
+		return 0;
+	case PGTABLE_LEVEL_PMD:
+		return PMD_ORDER;
+	case PGTABLE_LEVEL_PUD:
+		return PUD_ORDER;
+	case PGTABLE_LEVEL_P4D:
+		return P4D_ORDER;
+	case PGTABLE_LEVEL_PGD:
+		return PGDIR_ORDER;
+	default:
+		BUILD_BUG();
+	}
+}
+#endif /* CONFIG_MMU */
+
 #endif /* !__ASSEMBLY__ */
 
 #if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 5a02ffd3744a..a71cdd706c7e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -133,10 +133,10 @@ static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id)
 }
 
 static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio,
-		unsigned int nr_mappings, mm_id_t mm_id)
+		unsigned int nr_mappings, unsigned int nr_pages, mm_id_t mm_id)
 {
 	VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio));
-	VM_WARN_ON_ONCE(nr_mappings == 0);
+	VM_WARN_ON_ONCE(nr_mappings == 0 || nr_pages == 0 || nr_mappings > nr_pages);
 	VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX);
 
 	/*
@@ -145,7 +145,7 @@ static inline void __folio_large_mapcount_sanity_checks(const struct folio *foli
 	 * a check on 32bit, where we currently reduce the size of the per-MM
 	 * mapcount to a short.
 	 */
-	VM_WARN_ON_ONCE(nr_mappings > folio_large_nr_pages(folio));
+	VM_WARN_ON_ONCE(nr_pages > folio_large_nr_pages(folio));
 	VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX);
 
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY &&
@@ -161,31 +161,38 @@ static inline void __folio_large_mapcount_sanity_checks(const struct folio *foli
 }
 
 static __always_inline void folio_set_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings, struct vm_area_struct *vma)
+		unsigned int nr_mappings, int nr_pages, struct vm_area_struct *vma)
 {
-	__folio_large_mapcount_sanity_checks(folio, nr_mappings, vma->vm_mm->mm_id);
+	__folio_large_mapcount_sanity_checks(folio, nr_mappings, nr_pages,
+					     vma->vm_mm->mm_id);
 
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY);
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY);
 
 	/* Note: mapcounts start at -1. */
 	atomic_set(&folio->_large_mapcount, nr_mappings - 1);
+	atomic_long_set(&folio->_total_mapped_pages, nr_pages);
 	folio->_mm_id_mapcount[0] = nr_mappings - 1;
 	folio_set_mm_id(folio, 0, vma->vm_mm->mm_id);
 }
 
 static __always_inline int folio_add_return_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings, struct vm_area_struct *vma)
+		unsigned int nr_mappings, unsigned int nr_pages,
+		struct vm_area_struct *vma, unsigned long *nr_mapped_pages)
 {
 	const mm_id_t mm_id = vma->vm_mm->mm_id;
+	unsigned long new_mapped_pages;
 	int new_mapcount_val;
 
 	folio_lock_large_mapcount(folio);
-	__folio_large_mapcount_sanity_checks(folio, nr_mappings, mm_id);
+	__folio_large_mapcount_sanity_checks(folio, nr_mappings, nr_pages, mm_id);
 
 	new_mapcount_val = atomic_read(&folio->_large_mapcount) + nr_mappings;
 	atomic_set(&folio->_large_mapcount, new_mapcount_val);
 
+	new_mapped_pages = atomic_long_read(&folio->_total_mapped_pages) + nr_pages;
+	atomic_long_set(&folio->_total_mapped_pages, new_mapped_pages);
+
 	/*
 	 * If a folio is mapped more than once into an MM on 32bit, we
 	 * can in theory overflow the per-MM mapcount (although only for
@@ -220,22 +227,38 @@ static __always_inline int folio_add_return_large_mapcount(struct folio *folio,
 		folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT;
 	}
 	folio_unlock_large_mapcount(folio);
+
+	*nr_mapped_pages = new_mapped_pages;
 	return new_mapcount_val + 1;
 }
-#define folio_add_large_mapcount folio_add_return_large_mapcount
+
+static __always_inline void folio_add_large_mapcount(struct folio *folio,
+		unsigned int nr_mappings, unsigned int nr_pages,
+		struct vm_area_struct *vma)
+{
+	unsigned long nr_mapped_pages;
+
+	folio_add_return_large_mapcount(folio, nr_mappings, nr_pages, vma,
+					&nr_mapped_pages);
+}
 
 static __always_inline int folio_sub_return_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings, struct vm_area_struct *vma)
+		unsigned int nr_mappings, unsigned int nr_pages,
+		struct vm_area_struct *vma, unsigned long *nr_mapped_pages)
 {
 	const mm_id_t mm_id = vma->vm_mm->mm_id;
+	unsigned long new_mapped_pages;
 	int new_mapcount_val;
 
 	folio_lock_large_mapcount(folio);
-	__folio_large_mapcount_sanity_checks(folio, nr_mappings, mm_id);
+	__folio_large_mapcount_sanity_checks(folio, nr_mappings, nr_pages, mm_id);
 
 	new_mapcount_val = atomic_read(&folio->_large_mapcount) - nr_mappings;
 	atomic_set(&folio->_large_mapcount, new_mapcount_val);
 
+	new_mapped_pages = atomic_long_read(&folio->_total_mapped_pages) - nr_pages;
+	atomic_long_set(&folio->_total_mapped_pages, new_mapped_pages);
+
 	/*
 	 * There are valid corner cases where we might underflow a per-MM
 	 * mapcount (some mappings added when no slot was free, some mappings
@@ -267,56 +290,59 @@ static __always_inline int folio_sub_return_large_mapcount(struct folio *folio,
 		folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT;
 out:
 	folio_unlock_large_mapcount(folio);
+
+	*nr_mapped_pages = new_mapped_pages;
 	return new_mapcount_val + 1;
 }
-#define folio_sub_large_mapcount folio_sub_return_large_mapcount
 #else /* !CONFIG_MM_ID */
 /*
  * See __folio_rmap_sanity_checks(), we might map large folios even without
  * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now.
  */
 static inline void folio_set_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings,
+		unsigned int nr_mappings, unsigned int nr_pages,
 		struct vm_area_struct *vma)
 {
+	/* No support for large mappings. */
+	VM_WARN_ON_ONCE(nr_mappings != nr_pages);
 	/* Note: mapcounts start at -1. */
 	atomic_set(&folio->_large_mapcount, nr_mappings - 1);
 }
 
 static inline void folio_add_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings, struct vm_area_struct *vma)
+		unsigned int nr_mappings, unsigned int nr_pages,
+		struct vm_area_struct *vma)
 {
+	/* No support for large mappings. */
+	VM_WARN_ON_ONCE(nr_mappings != nr_pages);
 	atomic_add(nr_mappings, &folio->_large_mapcount);
 }
 
 static inline int folio_add_return_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings, struct vm_area_struct *vma)
+		unsigned int nr_mappings, unsigned int nr_pages,
+		struct vm_area_struct *vma, unsigned long *nr_mapped_pages)
 {
-	return atomic_add_return(nr_mappings, &folio->_large_mapcount) + 1;
-}
+	int new_mapcount = atomic_add_return(nr_mappings, &folio->_large_mapcount) + 1;
 
-static inline void folio_sub_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings, struct vm_area_struct *vma)
-{
-	atomic_sub(nr_mappings, &folio->_large_mapcount);
+	/* No support for large mappings. */
+	VM_WARN_ON_ONCE(nr_mappings != nr_pages);
+	*nr_mapped_pages = new_mapcount;
+	return new_mapcount;
 }
 
 static inline int folio_sub_return_large_mapcount(struct folio *folio,
-		unsigned int nr_mappings, struct vm_area_struct *vma)
+		unsigned int nr_mappings, unsigned int nr_pages,
+		struct vm_area_struct *vma, unsigned long *nr_mapped_pages)
 {
-	return atomic_sub_return(nr_mappings, &folio->_large_mapcount) + 1;
+	int new_mapcount = atomic_sub_return(nr_mappings, &folio->_large_mapcount) + 1;
+
+	/* No support for large mappings. */
+	VM_WARN_ON_ONCE(nr_mappings != nr_pages);
+	*nr_mapped_pages = new_mapcount;
+	return new_mapcount;
 }
 #endif /* CONFIG_MM_ID */
 
-#define folio_inc_large_mapcount(folio, vma) \
-	folio_add_large_mapcount(folio, 1, vma)
-#define folio_inc_return_large_mapcount(folio, vma) \
-	folio_add_return_large_mapcount(folio, 1, vma)
-#define folio_dec_large_mapcount(folio, vma) \
-	folio_sub_large_mapcount(folio, 1, vma)
-#define folio_dec_return_large_mapcount(folio, vma) \
-	folio_sub_return_large_mapcount(folio, 1, vma)
-
 /* RMAP flags, currently only relevant for some anon rmap operations. */
 typedef int __bitwise rmap_t;
 
@@ -332,6 +358,8 @@ typedef int __bitwise rmap_t;
 static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio,
 		const struct page *page, int nr_pages, enum pgtable_level level)
 {
+	const unsigned int mapping_order = pgtable_level_to_order(level);
+
 	/* hugetlb folios are handled separately. */
 	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
 
@@ -351,29 +379,8 @@ static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio
 	VM_WARN_ON_FOLIO(page_folio(page) != folio, folio);
 	VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio);
 
-	switch (level) {
-	case PGTABLE_LEVEL_PTE:
-		break;
-	case PGTABLE_LEVEL_PMD:
-		/*
-		 * We don't support folios larger than a single PMD yet. So
-		 * when PGTABLE_LEVEL_PMD is set, we assume that we are creating
-		 * a single "entire" mapping of the folio.
-		 */
-		VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio);
-		VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio);
-		break;
-	case PGTABLE_LEVEL_PUD:
-		/*
-		 * Assume that we are creating a single "entire" mapping of the
-		 * folio.
-		 */
-		VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio);
-		VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio);
-		break;
-	default:
-		BUILD_BUG();
-	}
+	VM_WARN_ON_FOLIO(!IS_ALIGNED(nr_pages, 1u << mapping_order), folio);
+	VM_WARN_ON_FOLIO(!IS_ALIGNED(folio_page_idx(folio, page), 1u << mapping_order), folio);
 
 	/*
 	 * Anon folios must have an associated live anon_vma as long as they're
@@ -491,25 +498,14 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
 		enum pgtable_level level)
 {
-	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
+	const unsigned int nr_mappings = nr_pages >> pgtable_level_to_order(level);
 
-	switch (level) {
-	case PGTABLE_LEVEL_PTE:
-		if (!folio_test_large(folio)) {
-			atomic_inc(&folio->_mapcount);
-			break;
-		}
+	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
 
-		folio_add_large_mapcount(folio, nr_pages, dst_vma);
-		break;
-	case PGTABLE_LEVEL_PMD:
-	case PGTABLE_LEVEL_PUD:
-		atomic_inc(&folio->_entire_mapcount);
-		folio_inc_large_mapcount(folio, dst_vma);
-		break;
-	default:
-		BUILD_BUG();
-	}
+	if (level == PGTABLE_LEVEL_PTE && !folio_test_large(folio))
+		atomic_inc(&folio->_mapcount);
+	else
+		folio_add_large_mapcount(folio, nr_mappings, nr_pages, dst_vma);
 }
 
 /**
@@ -559,7 +555,6 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *dst_vma,
 		struct vm_area_struct *src_vma, enum pgtable_level level)
 {
-	const int orig_nr_pages = nr_pages;
 	bool maybe_pinned;
 	int i;
 
@@ -581,39 +576,28 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
 	 * folio. But if any page is PageAnonExclusive, we must fallback to
 	 * copying if the folio maybe pinned.
 	 */
-	switch (level) {
-	case PGTABLE_LEVEL_PTE:
-		if (unlikely(maybe_pinned)) {
-			for (i = 0; i < nr_pages; i++)
-				if (PageAnonExclusive(page + i))
-					return -EBUSY;
-		}
-
-		if (!folio_test_large(folio)) {
-			if (PageAnonExclusive(page))
-				ClearPageAnonExclusive(page);
-			atomic_inc(&folio->_mapcount);
-			break;
-		}
-
-		do {
-			if (PageAnonExclusive(page))
-				ClearPageAnonExclusive(page);
-		} while (page++, --nr_pages > 0);
-		folio_add_large_mapcount(folio, orig_nr_pages, dst_vma);
-		break;
-	case PGTABLE_LEVEL_PMD:
-	case PGTABLE_LEVEL_PUD:
+	if (level == PGTABLE_LEVEL_PTE && !folio_test_large(folio)) {
 		if (PageAnonExclusive(page)) {
 			if (unlikely(maybe_pinned))
 				return -EBUSY;
 			ClearPageAnonExclusive(page);
 		}
-		atomic_inc(&folio->_entire_mapcount);
-		folio_inc_large_mapcount(folio, dst_vma);
-		break;
-	default:
-		BUILD_BUG();
+		atomic_inc(&folio->_mapcount);
+	} else {
+		const unsigned int mapping_order = pgtable_level_to_order(level);
+		const unsigned int nr_mappings = nr_pages >> mapping_order;
+
+		if (unlikely(maybe_pinned)) {
+			for (i = 0; i < nr_pages; i += 1u << mapping_order)
+				if (PageAnonExclusive(page + i))
+					return -EBUSY;
+		} else {
+			for (i = 0; i < nr_pages; i += 1u << mapping_order) {
+				if (PageAnonExclusive(page + i))
+					ClearPageAnonExclusive(page + i);
+			}
+		}
+		folio_add_large_mapcount(folio, nr_mappings, nr_pages, dst_vma);
 	}
 	return 0;
 }
diff --git a/mm/debug.c b/mm/debug.c
index 82baaf87ef3d..15d3cb9c1cb0 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -83,10 +83,10 @@ static void __dump_folio(const struct folio *folio, const struct page *page,
 	if (folio_test_large(folio)) {
 		int pincount = atomic_read(&folio->_pincount);
 
-		pr_warn("head: order:%u mapcount:%d entire_mapcount:%d pincount:%d\n",
+		pr_warn("head: order:%u mapcount:%d total_mapped_pages:%lu pincount:%d\n",
 				folio_order(folio),
 				folio_mapcount(folio),
-				folio_entire_mapcount(folio);
+				folio_total_mapped_pages(folio),
 				pincount);
 	}
 
diff --git a/mm/internal.h b/mm/internal.h
index aa1206495bc6..d4d74f614e7f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -868,7 +868,7 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
 		folio->_mm_id_mapcount[0] = -1;
 		folio->_mm_id_mapcount[1] = -1;
 	}
-	atomic_set(&folio->_entire_mapcount, -1);
+	atomic_long_set(&folio->_total_mapped_pages, 0);
 	atomic_set(&folio->_pincount, 0);
 	if (order > 1)
 		INIT_LIST_HEAD(&folio->_deferred_list);
diff --git a/mm/memory.c b/mm/memory.c
index ea6568571131..6a3e0eed29cc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4139,8 +4139,7 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio,
 	if (folio_large_mapcount(folio) != folio_ref_count(folio))
 		goto unlock;
 
-	VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_nr_pages(folio), folio);
-	VM_WARN_ON_ONCE_FOLIO(folio_entire_mapcount(folio), folio);
+	VM_WARN_ON_ONCE_FOLIO(folio_total_mapped_pages(folio) > folio_nr_pages(folio), folio);
 	VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != vma->vm_mm->mm_id &&
 			folio_mm_id(folio, 1) != vma->vm_mm->mm_id);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8ed4c73fdba4..43000d869215 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1121,8 +1121,8 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
 				goto out;
 			}
 		}
-		if (folio_entire_mapcount(folio)) {
-			bad_page(page, "nonzero entire_mapcount");
+		if (unlikely(atomic_long_read(&folio->_total_mapped_pages))) {
+			bad_page(page, "nonzero total_mapped_pages");
 			goto out;
 		}
 		if (unlikely(atomic_read(&folio->_pincount))) {
diff --git a/mm/rmap.c b/mm/rmap.c
index d08927949284..47b144f6d3c7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1357,30 +1357,28 @@ static __always_inline void __folio_add_rmap(struct folio *folio,
 
 	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
 
-	switch (level) {
-	case PGTABLE_LEVEL_PTE:
-		if (!folio_test_large(folio)) {
-			nr = atomic_inc_and_test(&folio->_mapcount);
-			break;
-		}
+	if (level == PGTABLE_LEVEL_PTE && !folio_test_large(folio)) {
+		nr = atomic_inc_and_test(&folio->_mapcount);
+	} else {
+		const unsigned int nr_mappings = nr_pages >> pgtable_level_to_order(level);
+		unsigned long nr_mapped_pages;
 
-		mapcount = folio_add_return_large_mapcount(folio, nr_pages, vma);
-		if (mapcount == nr_pages)
+		mapcount = folio_add_return_large_mapcount(folio, nr_mappings,
+							   nr_pages, vma,
+							   &nr_mapped_pages);
+		if (mapcount == nr_mappings)
 			nr = folio_large_nr_pages(folio);
-		break;
-	case PGTABLE_LEVEL_PMD:
-	case PGTABLE_LEVEL_PUD:
-		if (atomic_inc_and_test(&folio->_entire_mapcount) &&
-		    level == PGTABLE_LEVEL_PMD)
-			nr_pmdmapped = HPAGE_PMD_NR;
 
-		mapcount = folio_inc_return_large_mapcount(folio, vma);
-		if (mapcount == 1)
-			nr = folio_large_nr_pages(folio);
-		break;
-	default:
-		BUILD_BUG();
+		/*
+		 * For PMD-sized THPs, we'll adjust the counter once the
+		 * first PMD mapping is added.
+		 */
+		if (level == PGTABLE_LEVEL_PMD &&
+		    folio_large_nr_pages(folio) == HPAGE_PMD_NR &&
+		    nr_mapped_pages - mapcount == nr_pages - nr_mappings)
+			nr_pmdmapped = HPAGE_PMD_NR;
 	}
+
 	__folio_mod_stat(folio, nr, nr_pmdmapped);
 }
 
@@ -1483,35 +1481,14 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio,
 		__page_check_anon_rmap(folio, page, vma, address);
 
 	if (flags & RMAP_EXCLUSIVE) {
-		switch (level) {
-		case PGTABLE_LEVEL_PTE:
-			for (i = 0; i < nr_pages; i++)
-				SetPageAnonExclusive(page + i);
-			break;
-		case PGTABLE_LEVEL_PMD:
-			SetPageAnonExclusive(page);
-			break;
-		case PGTABLE_LEVEL_PUD:
-			/*
-			 * Keep the compiler happy, we don't support anonymous
-			 * PUD mappings.
-			 */
-			WARN_ON_ONCE(1);
-			break;
-		default:
-			BUILD_BUG();
-		}
+		const unsigned int mapping_order = pgtable_level_to_order(level);
+
+		for (i = 0; i < nr_pages; i += 1u << mapping_order)
+			SetPageAnonExclusive(page + i);
 	}
 
 	VM_WARN_ON_FOLIO(!folio_test_large(folio) && PageAnonExclusive(page) &&
 			 atomic_read(&folio->_mapcount) > 0, folio);
-	for (i = 0; i < nr_pages; i++) {
-		struct page *cur_page = page + i;
-
-		VM_WARN_ON_FOLIO(folio_test_large(folio) &&
-				 folio_entire_mapcount(folio) > 1 &&
-				 PageAnonExclusive(cur_page), folio);
-	}
 
 	/*
 	 * Only mlock it if the folio is fully mapped to the VMA.
@@ -1608,27 +1585,34 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
 		atomic_set(&folio->_mapcount, 0);
 		if (exclusive)
 			SetPageAnonExclusive(&folio->page);
-	} else if (!folio_test_pmd_mappable(folio)) {
+	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+		const unsigned int order = folio_large_order(folio);
+		unsigned int nr_mappings, mapping_order;
 		int i;
 
+		if (order >= PUD_ORDER)
+			mapping_order = PUD_ORDER;
+		else if (order >= PMD_ORDER)
+			mapping_order = PMD_ORDER;
+		else
+			mapping_order = 0;
+
 		nr = folio_large_nr_pages(folio);
+		if (order == PMD_ORDER)
+			nr_pmdmapped = 1u << order;
+
 		if (exclusive) {
-			for (i = 0; i < nr; i++) {
+			for (i = 0; i < nr; i += 1u << mapping_order) {
 				struct page *page = folio_page(folio, i);
 
 				SetPageAnonExclusive(page);
 			}
 		}
 
-		folio_set_large_mapcount(folio, nr, vma);
+		nr_mappings = nr >> mapping_order;
+		folio_set_large_mapcount(folio, nr_mappings, nr, vma);
 	} else {
-		nr = folio_large_nr_pages(folio);
-		/* increment count (starts at -1) */
-		atomic_set(&folio->_entire_mapcount, 0);
-		folio_set_large_mapcount(folio, 1, vma);
-		if (exclusive)
-			SetPageAnonExclusive(&folio->page);
-		nr_pmdmapped = nr;
+		WARN_ON_ONCE(1);
 	}
 
 	VM_WARN_ON_ONCE(address < vma->vm_start ||
@@ -1714,8 +1698,10 @@ void folio_add_file_rmap_pud(struct folio *folio, struct page *page,
 #endif
 }
 
-static bool __folio_certainly_partially_mapped(struct folio *folio, int mapcount)
+static bool __folio_certainly_partially_mapped(struct folio *folio)
 {
+	unsigned long total_mapped_pages = atomic_long_read(&folio->_total_mapped_pages);
+
 	/*
 	 * This is a best-effort check only: if the average per-page
 	 * mapcount in the folio is smaller than 1, at least one page is not
@@ -1727,8 +1713,7 @@ static bool __folio_certainly_partially_mapped(struct folio *folio, int mapcount
 	 * average per-page mapcount is >= 1. However, we will detect the
 	 * partial mapping once it becomes exclusively mapped again.
 	 */
-	return mapcount && !folio_entire_mapcount(folio) &&
-	       mapcount < folio_large_nr_pages(folio);
+	return total_mapped_pages && total_mapped_pages < folio_large_nr_pages(folio);
 }
 
 static __always_inline void __folio_remove_rmap(struct folio *folio,
@@ -1736,53 +1721,45 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
 		enum pgtable_level level)
 {
 	int nr = 0, nr_pmdmapped = 0, mapcount;
-	bool partially_mapped = false;
 
 	__folio_rmap_sanity_checks(folio, page, nr_pages, level);
 
-	switch (level) {
-	case PGTABLE_LEVEL_PTE:
-		if (!folio_test_large(folio)) {
-			nr = atomic_add_negative(-1, &folio->_mapcount);
-			break;
-		}
+	if (level == PGTABLE_LEVEL_PTE && !folio_test_large(folio)) {
+		nr = atomic_add_negative(-1, &folio->_mapcount);
+	} else {
+		const unsigned int nr_mappings = nr_pages >> pgtable_level_to_order(level);
+		unsigned long nr_mapped_pages;
 
-		mapcount = folio_sub_return_large_mapcount(folio, nr_pages, vma);
+		mapcount = folio_sub_return_large_mapcount(folio, nr_mappings,
+							   nr_pages, vma,
+							   &nr_mapped_pages);
 		if (!mapcount)
 			nr = folio_large_nr_pages(folio);
 
-		partially_mapped = __folio_certainly_partially_mapped(folio, mapcount);
-		break;
-	case PGTABLE_LEVEL_PMD:
-	case PGTABLE_LEVEL_PUD:
-		mapcount = folio_dec_return_large_mapcount(folio, vma);
-		if (!mapcount)
-			nr = folio_large_nr_pages(folio);
-
-		if (atomic_add_negative(-1, &folio->_entire_mapcount) &&
-		    level == PGTABLE_LEVEL_PMD)
+		/*
+		 * For PMD-sized THPs, we'll adjust the counter once the
+		 * last PMD mapping is removed.
+		 */
+		if (level == PGTABLE_LEVEL_PMD &&
+		    folio_large_nr_pages(folio) == HPAGE_PMD_NR &&
+		    nr_mapped_pages - mapcount == 0)
 			nr_pmdmapped = HPAGE_PMD_NR;
 
-		partially_mapped = __folio_certainly_partially_mapped(folio, mapcount);
-		break;
-	default:
-		BUILD_BUG();
+		/*
+		 * Queue anon large folio for deferred split if at least one
+		 * page of the folio is unmapped and at least one page is still
+		 * mapped.
+		 *
+		 * Device private folios do not support deferred splitting and
+		 * shrinker based scanning of the folios to free.
+		 */
+		if (folio_test_anon(folio) &&
+		    __folio_certainly_partially_mapped(folio) &&
+		    !folio_test_partially_mapped(folio) &&
+		    !folio_is_device_private(folio))
+			deferred_split_folio(folio, true);
 	}
 
-	/*
-	 * Queue anon large folio for deferred split if at least one page of
-	 * the folio is unmapped and at least one page is still mapped.
-	 *
-	 * Check partially_mapped first to ensure it is a large folio.
-	 *
-	 * Device private folios do not support deferred splitting and
-	 * shrinker based scanning of the folios to free.
-	 */
-	if (partially_mapped && folio_test_anon(folio) &&
-	    !folio_test_partially_mapped(folio) &&
-	    !folio_is_device_private(folio))
-		deferred_split_folio(folio, true);
-
 	__folio_mod_stat(folio, -nr, -nr_pmdmapped);
 
 	/*

-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v3] doc: Add CPU Isolation documentation
From: Jonathan Corbet @ 2026-04-12 19:11 UTC (permalink / raw)
  To: Frederic Weisbecker, LKML
  Cc: Frederic Weisbecker, Anna-Maria Behnsen, Gabriele Monaco,
	Ingo Molnar, Marcelo Tosatti, Marco Crivellari, Michal Hocko,
	Paul E . McKenney, Peter Zijlstra, Phil Auld, Steven Rostedt,
	Thomas Gleixner, Valentin Schneider, Vlastimil Babka, Waiman Long,
	linux-doc, Sebastian Andrzej Siewior, Bagas Sanjaya
In-Reply-To: <20260402094749.18879-1-frederic@kernel.org>

Frederic Weisbecker <frederic@kernel.org> writes:

> nohz_full was introduced in v3.10 in 2013, which means this
> documentation is overdue for 13 years.

I've seen worse :)

> Fortunately Paul wrote a part of the needed documentation a while ago,
> especially concerning nohz_full in Documentation/timers/no_hz.rst and
> also about per-CPU kthreads in
> Documentation/admin-guide/kernel-per-CPU-kthreads.rst
>
> Introduce a new page that gives an overview of CPU isolation in general.
>
> Acked-by: Waiman Long <longman@redhat.com>
> Reviewed-by: Valentin Schneider <vschneid@redhat.com>
> Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
> ---
> v3: Apply suggestions from Randy, Steven, Valentin, Waiman and also Sashiko!
>
>  Documentation/admin-guide/cpu-isolation.rst | 357 ++++++++++++++++++++
>  Documentation/admin-guide/index.rst         |   1 +
>  2 files changed, 358 insertions(+)
>  create mode 100644 Documentation/admin-guide/cpu-isolation.rst

Applied, thanks.

jon

^ permalink raw reply

* Re: [PATCH] Documentation: core-api: real-time: correct spelling
From: Jonathan Corbet @ 2026-04-12 19:12 UTC (permalink / raw)
  To: Sukrut Heroorkar, Sebastian Andrzej Siewior, Clark Williams,
	Steven Rostedt, Shuah Khan,
	open list:Real-time Linux (PREEMPT_RT), open list:DOCUMENTATION,
	open list
  Cc: Sukrut Heroorkar
In-Reply-To: <20260411155120.233357-1-hsukrut3@gmail.com>

Sukrut Heroorkar <hsukrut3@gmail.com> writes:

> Fix typo "excpetion" with "exception".
>
> Signed-off-by: Sukrut Heroorkar <hsukrut3@gmail.com>
> ---
>  Documentation/core-api/real-time/architecture-porting.rst | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/Documentation/core-api/real-time/architecture-porting.rst b/Documentation/core-api/real-time/architecture-porting.rst
> index c90a426d8062..c9a39d708866 100644
> --- a/Documentation/core-api/real-time/architecture-porting.rst
> +++ b/Documentation/core-api/real-time/architecture-porting.rst
> @@ -74,7 +74,7 @@ Exception handlers
>    Enabling interrupts is especially important on PREEMPT_RT, where certain
>    locks, such as spinlock_t, become sleepable. For example, handling an
>    invalid opcode may result in sending a SIGILL signal to the user task. A
> -  debug excpetion will send a SIGTRAP signal.
> +  debug exception will send a SIGTRAP signal.
>    In both cases, if the exception occurred in user space, it is safe to enable

Applied, thanks.

jon

^ permalink raw reply

* [PATCH] Docs/mm/damon/maintainer-profile: add AI review usage guideline
From: SeongJae Park @ 2026-04-12 21:19 UTC (permalink / raw)
  To: Andrew Morton
  Cc: SeongJae Park, Liam R. Howlett, David Hildenbrand,
	Jonathan Corbet, Lorenzo Stoakes, Michal Hocko, Mike Rapoport,
	Randy Dunlap, Shuah Khan, Suren Baghdasaryan, Vlastimil Babka,
	damon, linux-doc, linux-kernel, linux-mm, sashiko

DAMON is opted-in for DAMON patches scanning [1] and email delivery [2].
Clarify how that could be used on DAMON maintainer profile.

[1] https://github.com/sashiko-dev/sashiko/commit/ad9f4a98f958
[2] https://github.com/sashiko-dev/sashiko/commit/b554c7b6e733

Signed-off-by: SeongJae Park <sj@kernel.org>
---
Changes from v1
- v1: https://lore.kernel.org/20260411183029.81030-1-sj@kernel.org
- replace typo-ed word: s/a non-mandastory/an optional/.
- wordsmith recipients reduction rule of thumbs.

 Documentation/mm/damon/maintainer-profile.rst | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/Documentation/mm/damon/maintainer-profile.rst b/Documentation/mm/damon/maintainer-profile.rst
index bcb9798a27a86..fb2fa00cc9aa1 100644
--- a/Documentation/mm/damon/maintainer-profile.rst
+++ b/Documentation/mm/damon/maintainer-profile.rst
@@ -100,3 +100,24 @@ There is also a public Google `calendar
 <https://calendar.google.com/calendar/u/0?cid=ZDIwOTA4YTMxNjc2MDQ3NTIyMmUzYTM5ZmQyM2U4NDA0ZGIwZjBiYmJlZGQxNDM0MmY4ZTRjOTE0NjdhZDRiY0Bncm91cC5jYWxlbmRhci5nb29nbGUuY29t>`_
 that has the events.  Anyone can subscribe to it.  DAMON maintainer will also
 provide periodic reminders to the mailing list (damon@lists.linux.dev).
+
+AI Review
+---------
+
+For patches that are publicly posted to DAMON mailing list
+(damon@lists.linux.dev), AI reviews of the patches will be available at
+sashiko.dev.  The reviews could also be sent as mails to the author of the
+patch.
+
+Patch authors are encouraged to check the AI reviews and share their opinions.
+The sharing could be done as a reply to the mail thread.  Consider reducing the
+recipients list for such sharing, since some people are not really interested
+in AI reviews.  As a rule of thumb, drop stable@vger.kernel.org and individuals
+except DAMON maintainer.
+
+`hkml` also provides a `feature
+<https://github.com/sjp38/hackermail/blob/master/USAGE.md#forwarding-sashikodev-statuscomments-to-mailing-list>`_
+for such sharing.  Please feel free to use the feature.
+
+It is only an optional recommendation.  DAMON maintainer could also ask any
+question about the AI reviews, though.

base-commit: 507d761b28278a923d86e7a29c5d084b74639e15
-- 
2.47.3

^ permalink raw reply related

* [PATCH v3 0/5] Add OneXPlayer Configuration HID Driver
From: Derek J. Clark @ 2026-04-12 21:34 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires
  Cc: Pierre-Loup A . Griffais, Lambert Fan, Zhouwang Huang,
	Derek J . Clark, linux-input, linux-doc, linux-kernel

Adds an HID driver for OneXPlayer HID configuration devices. There are
currently 2 generations of OneXPlayer HID protocol. The first (OneXPlayer
F1 series) only provides an RGB control interface over HID. The Second
(X1 mini series, G1 series, AOKZOE A1X) also includes a hardware level
button mapping interface, vibration intensity settings, and the ability
to switch output between xinput and a debug mode that can be used to debug
the button mapping. Some devices (G1 Series, APEX) use a hybrid of Gen1
RGB control and Gen 2 controller settings. To ensure there is no conflicts
when the driver is loaded, we skip creating the RGB interface for Gen 2
devices if there is a DMI match.

I'll also add a note that Gen 1 devices also have an interface for
setting the key map and debug mode, but that is done entirely over a
serial TTY device so it is not able to be added to this driver. There
are also some "Gen 0" devices (OneXPlayer 2 Series) also use it, but
the TTY interface also handles the RGB control so no support is
provided by this driver for those interfaces.

Signed-off-by: Derel J. Clark <derekjohn.clark@gmail.com>

Derek J. Clark (5):
  HID: hid-oxp: Add OneXPlayer configuration driver
  HID: hid-oxp: Add Second Generation RGB Control
  HID: hid-oxp: Add Second Generation Gamepad Mode Switch
  HID: hid-oxp: Add Button Mapping Interface
  HID: hid-oxp: Add Vibration Intensity Attributes

 MAINTAINERS           |    6 +
 drivers/hid/Kconfig   |   13 +
 drivers/hid/Makefile  |    1 +
 drivers/hid/hid-ids.h |    6 +
 drivers/hid/hid-oxp.c | 1575 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1601 insertions(+)
 create mode 100644 drivers/hid/hid-oxp.c

-- 
2.53.0

^ permalink raw reply

* [PATCH v3 1/5] HID: hid-oxp: Add OneXPlayer configuration driver
From: Derek J. Clark @ 2026-04-12 21:34 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires
  Cc: Pierre-Loup A . Griffais, Lambert Fan, Zhouwang Huang,
	Derek J . Clark, linux-input, linux-doc, linux-kernel
In-Reply-To: <20260412213444.2231505-1-derekjohn.clark@gmail.com>

Adds OneXPlayer HID configuration driver. In this initial driver patch,
add the RGB interface for the first generation of HID based RGB control.

This interface provides the following attributes:
- brightness: provided by the LED core, this works in a fairly unique
  way on this device. The hardware accepts 5 brightness values (0-4),
  which affects the brightness of the multicolor and animated effects
  built into the MCU firmware. For monocolor settings, the device
  expects the hardware brightness value to be pushed to maximum, then we
  apply brightness adjustments mathematically based on % (0-100). This
  leads to some odd conversion as we need the brightness slider to reach
  the full range, but it has no affect when incrementing between the
  division points for other effects.
- multi-intensity: provided by the LED core for red, green, and blue.
- effect: Allows the MCU to set 19 individual effects.
- effect_index: Lists the 19 valid effect names for the interface.
- enabled: Allows the MCU to toggle the RGB interface on/off.
- enabled_index: Lists the valid states for enabled.
- speed: Allows the MCU to set the animation rate for the various
  effects.
- speed_range: Lists the valid range of speed (0-9).

The MCU also has a few odd quirks that make sending multiple synchronous
events challenging. It will essentially freeze if it receives another
message before it has finished processing the last command. It also will
not reply if you wait on it using a completion. To get around this, we
do a 200ms sleep inside a work queue thread and debounce all but the most
recent message using a 50ms mod_delayed_work. This will cache the last
write, queue the work, then return so userspace can release its write
thread. The work queue is only used for brightness/multi-intensity as
that is the path likely to receive rapid successive writes.

Reviewed-by: Zhouwang Huang <honjow311@gmail.com>
Tested-by: Zhouwang Huang <honjow311@gmail.com>
Signed-off-by: Derek J. Clark <derekjohn.clark@gmail.com>
---
 MAINTAINERS           |   6 +
 drivers/hid/Kconfig   |  12 +
 drivers/hid/Makefile  |   1 +
 drivers/hid/hid-ids.h |   3 +
 drivers/hid/hid-oxp.c | 651 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 673 insertions(+)
 create mode 100644 drivers/hid/hid-oxp.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 6f6517bf4f97..dae814192fa4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19707,6 +19707,12 @@ S:	Maintained
 F:	drivers/mtd/nand/onenand/
 F:	include/linux/mtd/onenand*.h
 
+ONEXPLAYER HID DRIVER
+M:	Derek J. Clark <derekjohn.clark@gmail.com>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/hid/hid-oxp.c
+
 ONEXPLAYER PLATFORM EC DRIVER
 M:	Antheas Kapenekakis <lkml@antheas.dev>
 M:	Derek John Clark <derekjohn.clark@gmail.com>
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 3c034cd32fa8..2deaec9f467d 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -919,6 +919,18 @@ config HID_ORTEK
 	   - Ortek WKB-2000
 	   - Skycable wireless presenter
 
+config HID_OXP
+	tristate "OneXPlayer handheld controller configuration support"
+	depends on USB_HID
+	depends on LEDS_CLASS
+	depends on LEDS_CLASS_MULTICOLOR
+	help
+	  Say Y here if you would like to enable support for OneXPlayer handheld
+	  devices that come with RGB LED rings around the joysticks and macro buttons.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called hid-oxp.
+
 config HID_PANTHERLORD
 	tristate "Pantherlord/GreenAsia game controller"
 	help
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 03ef72ec4499..bda8a24c9257 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_HID_NTI)			+= hid-nti.o
 obj-$(CONFIG_HID_NTRIG)		+= hid-ntrig.o
 obj-$(CONFIG_HID_NVIDIA_SHIELD)	+= hid-nvidia-shield.o
 obj-$(CONFIG_HID_ORTEK)		+= hid-ortek.o
+obj-$(CONFIG_HID_OXP)		+= hid-oxp.o
 obj-$(CONFIG_HID_PRODIKEYS)	+= hid-prodikeys.o
 obj-$(CONFIG_HID_PANTHERLORD)	+= hid-pl.o
 obj-$(CONFIG_HID_PENMOUNT)	+= hid-penmount.o
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 5bad81222c6e..dcc5a3a70eaf 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1131,6 +1131,9 @@
 #define USB_VENDOR_ID_NVIDIA				0x0955
 #define USB_DEVICE_ID_NVIDIA_THUNDERSTRIKE_CONTROLLER	0x7214
 
+#define USB_VENDOR_ID_CRSC			0x1a2c
+#define USB_DEVICE_ID_ONEXPLAYER_GEN1		0xb001
+
 #define USB_VENDOR_ID_ONTRAK		0x0a07
 #define USB_DEVICE_ID_ONTRAK_ADU100	0x0064
 
diff --git a/drivers/hid/hid-oxp.c b/drivers/hid/hid-oxp.c
new file mode 100644
index 000000000000..c4219ecd8d71
--- /dev/null
+++ b/drivers/hid/hid-oxp.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  HID driver for OneXPlayer gamepad configuration devices.
+ *
+ *  Copyright (c) 2026 Valve Corporation
+ */
+
+#include <linux/array_size.h>
+#include <linux/cleanup.h>
+#include <linux/delay.h>
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/jiffies.h>
+#include <linux/kstrtox.h>
+#include <linux/led-class-multicolor.h>
+#include <linux/mutex.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include "hid-ids.h"
+
+#define OXP_PACKET_SIZE 64
+
+#define GEN1_MESSAGE_ID	0xff
+
+#define GEN1_USAGE_PAGE	0xff01
+
+enum oxp_function_index {
+	OXP_FID_GEN1_RGB_SET =		0x07,
+	OXP_FID_GEN1_RGB_REPLY =	0x0f,
+};
+
+static struct oxp_hid_cfg {
+	struct led_classdev_mc *led_mc;
+	struct hid_device *hdev;
+	struct mutex cfg_mutex; /*ensure single synchronous output report*/
+	u8 rgb_brightness;
+	u8 rgb_effect;
+	u8 rgb_speed;
+	u8 rgb_en;
+} drvdata;
+
+enum oxp_feature_en_index {
+	OXP_FEAT_DISABLED,
+	OXP_FEAT_ENABLED,
+};
+
+static const char *const oxp_feature_en_text[] = {
+	[OXP_FEAT_DISABLED] = "false",
+	[OXP_FEAT_ENABLED] = "true",
+};
+
+enum oxp_rgb_effect_index {
+	OXP_UNKNOWN,
+	OXP_EFFECT_AURORA,
+	OXP_EFFECT_BIRTHDAY,
+	OXP_EFFECT_FLOWING,
+	OXP_EFFECT_CHROMA_1,
+	OXP_EFFECT_NEON,
+	OXP_EFFECT_CHROMA_2,
+	OXP_EFFECT_DREAMY,
+	OXP_EFFECT_WARM,
+	OXP_EFFECT_CYBERPUNK,
+	OXP_EFFECT_SEA,
+	OXP_EFFECT_SUNSET,
+	OXP_EFFECT_COLORFUL,
+	OXP_EFFECT_MONSTER,
+	OXP_EFFECT_GREEN,
+	OXP_EFFECT_BLUE,
+	OXP_EFFECT_YELLOW,
+	OXP_EFFECT_TEAL,
+	OXP_EFFECT_PURPLE,
+	OXP_EFFECT_FOGGY,
+	OXP_EFFECT_MONO_LIST, /* placeholder for effect_index_show */
+};
+
+/* These belong to rgb_effect_index, but we want to hide them from
+ * rgb_effect_text
+ */
+
+#define OXP_GET_PROPERTY 0xfc
+#define OXP_SET_PROPERTY 0xfd
+#define OXP_EFFECT_MONO_TRUE 0xfe /* actual index for monocolor */
+
+static const char *const oxp_rgb_effect_text[] = {
+	[OXP_UNKNOWN] = "unknown",
+	[OXP_EFFECT_AURORA] = "aurora",
+	[OXP_EFFECT_BIRTHDAY] = "birthday_cake",
+	[OXP_EFFECT_FLOWING] = "flowing_light",
+	[OXP_EFFECT_CHROMA_1] = "chroma_popping",
+	[OXP_EFFECT_NEON] = "neon",
+	[OXP_EFFECT_CHROMA_2] = "chroma_breathing",
+	[OXP_EFFECT_DREAMY] = "dreamy",
+	[OXP_EFFECT_WARM] = "warm_sun",
+	[OXP_EFFECT_CYBERPUNK] = "cyberpunk",
+	[OXP_EFFECT_SEA] = "sea_foam",
+	[OXP_EFFECT_SUNSET] = "sunset_afterglow",
+	[OXP_EFFECT_COLORFUL] = "colorful",
+	[OXP_EFFECT_MONSTER] = "monster_woke",
+	[OXP_EFFECT_GREEN] = "green_breathing",
+	[OXP_EFFECT_BLUE] = "blue_breathing",
+	[OXP_EFFECT_YELLOW] = "yellow_breathing",
+	[OXP_EFFECT_TEAL] = "teal_breathing",
+	[OXP_EFFECT_PURPLE] = "purple_breathing",
+	[OXP_EFFECT_FOGGY] = "foggy_haze",
+	[OXP_EFFECT_MONO_LIST] = "monocolor",
+};
+
+struct oxp_gen_1_rgb_report {
+	u8 report_id;
+	u8 message_id;
+	u8 padding_2[2];
+	u8 effect;
+	u8 enabled;
+	u8 speed;
+	u8 brightness;
+	u8 red;
+	u8 green;
+	u8 blue;
+} __packed;
+
+static u16 get_usage_page(struct hid_device *hdev)
+{
+	return hdev->collection[0].usage >> 16;
+}
+
+static int oxp_hid_raw_event_gen_1(struct hid_device *hdev,
+				   struct hid_report *report, u8 *data,
+				   int size)
+{
+	struct led_classdev_mc *led_mc = drvdata.led_mc;
+	struct oxp_gen_1_rgb_report *rgb_rep;
+
+	if (data[1] != OXP_FID_GEN1_RGB_REPLY)
+		return 0;
+
+	rgb_rep = (struct oxp_gen_1_rgb_report *)data;
+	/* Ensure we save monocolor as the list value */
+	drvdata.rgb_effect = rgb_rep->effect == OXP_EFFECT_MONO_TRUE ?
+			     OXP_EFFECT_MONO_LIST :
+			     rgb_rep->effect;
+	drvdata.rgb_speed = rgb_rep->speed;
+	drvdata.rgb_en = rgb_rep->enabled == 0 ? OXP_FEAT_DISABLED :
+						 OXP_FEAT_ENABLED;
+	drvdata.rgb_brightness = rgb_rep->brightness;
+	led_mc->led_cdev.brightness = rgb_rep->brightness / 4 *
+				      led_mc->led_cdev.max_brightness;
+	/* If monocolor had less than 100% brightness on the previous boot,
+	 * there will be no reliable way to determine the real intensity.
+	 * Since intensity scaling is used with a hardware brightness set at max,
+	 * our brightness will always look like 100%. Use the last set value to
+	 * prevent successive boots from lowering the brightness further.
+	 * Brightness will be "wrong" but the effect will remain the same visually.
+	 */
+	led_mc->subled_info[0].intensity = rgb_rep->red;
+	led_mc->subled_info[1].intensity = rgb_rep->green;
+	led_mc->subled_info[2].intensity = rgb_rep->blue;
+
+	return 0;
+}
+
+static int oxp_hid_raw_event(struct hid_device *hdev, struct hid_report *report,
+			     u8 *data, int size)
+{
+	u16 up = get_usage_page(hdev);
+
+	dev_dbg(&hdev->dev, "raw event data: [%*ph]\n", OXP_PACKET_SIZE, data);
+
+	switch (up) {
+	case GEN1_USAGE_PAGE:
+		return oxp_hid_raw_event_gen_1(hdev, report, data, size);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int mcu_property_out(u8 *header, size_t header_size, u8 *data,
+			    size_t data_size, u8 *footer, size_t footer_size)
+{
+	unsigned char *dmabuf __free(kfree) = kzalloc(OXP_PACKET_SIZE, GFP_KERNEL);
+	int ret;
+
+	if (!dmabuf)
+		return -ENOMEM;
+
+	if (header_size + data_size + footer_size > OXP_PACKET_SIZE)
+		return -EINVAL;
+
+	guard(mutex)(&drvdata.cfg_mutex);
+	memcpy(dmabuf, header, header_size);
+	memcpy(dmabuf + header_size, data, data_size);
+	if (footer_size)
+		memcpy(dmabuf + OXP_PACKET_SIZE - footer_size, footer, footer_size);
+
+	dev_dbg(&drvdata.hdev->dev, "raw data: [%*ph]\n", OXP_PACKET_SIZE, dmabuf);
+
+	ret = hid_hw_output_report(drvdata.hdev, dmabuf, OXP_PACKET_SIZE);
+	if (ret < 0)
+		return ret;
+
+	/* MCU takes 200ms to be ready for another command. */
+	msleep(200);
+	return ret == OXP_PACKET_SIZE ? 0 : -EIO;
+}
+
+static int oxp_gen_1_property_out(enum oxp_function_index fid, u8 *data,
+				  u8 data_size)
+{
+	u8 header[] = { fid, GEN1_MESSAGE_ID };
+	size_t header_size = ARRAY_SIZE(header);
+
+	return mcu_property_out(header, header_size, data, data_size, NULL, 0);
+}
+
+static int oxp_rgb_status_store(u8 enabled, u8 speed, u8 brightness)
+{
+	u16 up = get_usage_page(drvdata.hdev);
+	u8 *data;
+
+	/* Always default to max brightness and use intensity scaling when in
+	 * monocolor mode.
+	 */
+	switch (up) {
+	case GEN1_USAGE_PAGE:
+		data = (u8[4]) { OXP_SET_PROPERTY, enabled, speed, brightness };
+		if (drvdata.rgb_effect == OXP_EFFECT_MONO_LIST)
+			data[3] = 0x04;
+		return oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, 4);
+	default:
+		return -ENODEV;
+	}
+}
+
+static ssize_t oxp_rgb_status_show(void)
+{
+	u16 up = get_usage_page(drvdata.hdev);
+	u8 *data;
+
+	switch (up) {
+	case GEN1_USAGE_PAGE:
+		data = (u8[1]) { OXP_GET_PROPERTY };
+		return oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, 1);
+	default:
+		return -ENODEV;
+	}
+}
+
+static int oxp_rgb_color_set(void)
+{
+	u8 max_br = drvdata.led_mc->led_cdev.max_brightness;
+	u8 br = drvdata.led_mc->led_cdev.brightness;
+	u16 up = get_usage_page(drvdata.hdev);
+	u8 green, red, blue;
+	size_t size;
+	u8 *data;
+	int i;
+
+	red = br * drvdata.led_mc->subled_info[0].intensity / max_br;
+	green = br * drvdata.led_mc->subled_info[1].intensity / max_br;
+	blue = br * drvdata.led_mc->subled_info[2].intensity / max_br;
+
+	switch (up) {
+	case GEN1_USAGE_PAGE:
+		size = 55;
+		data = (u8[55]) { OXP_EFFECT_MONO_TRUE };
+
+		for (i = 0; i < (size - 1) / 3; i++) {
+			data[3 * i + 1] = red;
+			data[3 * i + 2] = green;
+			data[3 * i + 3] = blue;
+		}
+		return oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, size);
+	default:
+		return -ENODEV;
+	}
+}
+
+static int oxp_rgb_effect_set(u8 effect)
+{
+	u16 up = get_usage_page(drvdata.hdev);
+	u8 *data;
+	int ret;
+
+	switch (effect) {
+	case OXP_EFFECT_AURORA:
+	case OXP_EFFECT_BIRTHDAY:
+	case OXP_EFFECT_FLOWING:
+	case OXP_EFFECT_CHROMA_1:
+	case OXP_EFFECT_NEON:
+	case OXP_EFFECT_CHROMA_2:
+	case OXP_EFFECT_DREAMY:
+	case OXP_EFFECT_WARM:
+	case OXP_EFFECT_CYBERPUNK:
+	case OXP_EFFECT_SEA:
+	case OXP_EFFECT_SUNSET:
+	case OXP_EFFECT_COLORFUL:
+	case OXP_EFFECT_MONSTER:
+	case OXP_EFFECT_GREEN:
+	case OXP_EFFECT_BLUE:
+	case OXP_EFFECT_YELLOW:
+	case OXP_EFFECT_TEAL:
+	case OXP_EFFECT_PURPLE:
+	case OXP_EFFECT_FOGGY:
+		switch (up) {
+		case GEN1_USAGE_PAGE:
+			data = (u8[1]) { effect };
+			ret = oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, 1);
+			break;
+		default:
+			ret = -ENODEV;
+		}
+		break;
+	case OXP_EFFECT_MONO_LIST:
+		ret = oxp_rgb_color_set();
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	drvdata.rgb_effect = effect;
+
+	return 0;
+}
+
+static ssize_t enabled_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int ret;
+	u8 val;
+
+	ret = sysfs_match_string(oxp_feature_en_text, buf);
+	if (ret < 0)
+		return ret;
+	val = ret;
+
+	ret = oxp_rgb_status_store(val, drvdata.rgb_speed,
+				   drvdata.rgb_brightness);
+	if (ret)
+		return ret;
+
+	drvdata.rgb_en = val;
+	return count;
+}
+
+static ssize_t enabled_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	int ret;
+
+	ret = oxp_rgb_status_show();
+	if (ret)
+		return ret;
+
+	if (drvdata.rgb_en >= ARRAY_SIZE(oxp_feature_en_text))
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%s\n", oxp_feature_en_text[drvdata.rgb_en]);
+}
+static DEVICE_ATTR_RW(enabled);
+
+static ssize_t enabled_index_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	size_t count = 0;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(oxp_feature_en_text); i++)
+		count += sysfs_emit_at(buf, count, "%s ", oxp_feature_en_text[i]);
+
+	if (count)
+		buf[count - 1] = '\n';
+
+	return count;
+}
+static DEVICE_ATTR_RO(enabled_index);
+
+static ssize_t effect_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	int ret;
+	u8 val;
+
+	ret = sysfs_match_string(oxp_rgb_effect_text, buf);
+	if (ret < 0)
+		return ret;
+
+	val = ret;
+
+	ret = oxp_rgb_status_store(drvdata.rgb_en, drvdata.rgb_speed,
+				   drvdata.rgb_brightness);
+	if (ret)
+		return ret;
+
+	ret = oxp_rgb_effect_set(val);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t effect_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	int ret;
+
+	ret = oxp_rgb_status_show();
+	if (ret)
+		return ret;
+
+	if (drvdata.rgb_effect >= ARRAY_SIZE(oxp_rgb_effect_text))
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%s\n", oxp_rgb_effect_text[drvdata.rgb_effect]);
+}
+
+static DEVICE_ATTR_RW(effect);
+
+static ssize_t effect_index_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	size_t count = 0;
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(oxp_rgb_effect_text); i++)
+		count += sysfs_emit_at(buf, count, "%s ", oxp_rgb_effect_text[i]);
+
+	if (count)
+		buf[count - 1] = '\n';
+
+	return count;
+}
+static DEVICE_ATTR_RO(effect_index);
+
+static ssize_t speed_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	int ret;
+	u8 val;
+
+	ret = kstrtou8(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val > 9)
+		return -EINVAL;
+
+	ret = oxp_rgb_status_store(drvdata.rgb_en, val, drvdata.rgb_brightness);
+	if (ret)
+		return ret;
+
+	drvdata.rgb_speed = val;
+	return count;
+}
+
+static ssize_t speed_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	int ret;
+
+	ret = oxp_rgb_status_show();
+	if (ret)
+		return ret;
+
+	if (drvdata.rgb_speed > 9)
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%hhu\n", drvdata.rgb_speed);
+}
+static DEVICE_ATTR_RW(speed);
+
+static ssize_t speed_range_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "0-9\n");
+}
+static DEVICE_ATTR_RO(speed_range);
+
+static void oxp_rgb_queue_fn(struct work_struct *work)
+{
+	unsigned int max_brightness = drvdata.led_mc->led_cdev.max_brightness;
+	unsigned int brightness = drvdata.led_mc->led_cdev.brightness;
+	u8 val = 4 * brightness / max_brightness;
+	int ret;
+
+	if (drvdata.rgb_brightness != val) {
+		ret = oxp_rgb_status_store(drvdata.rgb_en, drvdata.rgb_speed, val);
+		if (ret)
+			dev_err(drvdata.led_mc->led_cdev.dev,
+				"Error: Failed to write RGB Status: %i\n", ret);
+
+		drvdata.rgb_brightness = val;
+	}
+
+	if (drvdata.rgb_effect != OXP_EFFECT_MONO_LIST)
+		return;
+
+	ret = oxp_rgb_effect_set(drvdata.rgb_effect);
+	if (ret)
+		dev_err(drvdata.led_mc->led_cdev.dev, "Error: Failed to write RGB color: %i\n",
+			ret);
+}
+
+static DECLARE_DELAYED_WORK(oxp_rgb_queue, oxp_rgb_queue_fn);
+
+static void oxp_rgb_brightness_set(struct led_classdev *led_cdev,
+				   enum led_brightness brightness)
+{
+	led_cdev->brightness = brightness;
+	mod_delayed_work(system_wq, &oxp_rgb_queue, msecs_to_jiffies(50));
+}
+
+static struct attribute *oxp_rgb_attrs[] = {
+	&dev_attr_effect.attr,
+	&dev_attr_effect_index.attr,
+	&dev_attr_enabled.attr,
+	&dev_attr_enabled_index.attr,
+	&dev_attr_speed.attr,
+	&dev_attr_speed_range.attr,
+	NULL,
+};
+
+static const struct attribute_group oxp_rgb_attr_group = {
+	.attrs = oxp_rgb_attrs,
+};
+
+static struct mc_subled oxp_rgb_subled_info[] = {
+	{
+		.color_index = LED_COLOR_ID_RED,
+		.intensity = 0x24,
+		.channel = 0x1,
+	},
+	{
+		.color_index = LED_COLOR_ID_GREEN,
+		.intensity = 0x22,
+		.channel = 0x2,
+	},
+	{
+		.color_index = LED_COLOR_ID_BLUE,
+		.intensity = 0x99,
+		.channel = 0x3,
+	},
+};
+
+static struct led_classdev_mc oxp_cdev_rgb = {
+	.led_cdev = {
+		.name = "oxp:rgb:joystick_rings",
+		.color = LED_COLOR_ID_RGB,
+		.brightness = 0x64,
+		.max_brightness = 0x64,
+		.brightness_set = oxp_rgb_brightness_set,
+	},
+	.num_colors = ARRAY_SIZE(oxp_rgb_subled_info),
+	.subled_info = oxp_rgb_subled_info,
+};
+
+static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
+{
+	int ret;
+
+	hid_set_drvdata(hdev, &drvdata);
+	mutex_init(&drvdata.cfg_mutex);
+	drvdata.hdev = hdev;
+	drvdata.led_mc = &oxp_cdev_rgb;
+
+	ret = devm_led_classdev_multicolor_register(&hdev->dev, &oxp_cdev_rgb);
+	if (ret)
+		return dev_err_probe(&hdev->dev, ret,
+				     "Failed to create RGB device\n");
+
+	ret = devm_device_add_group(drvdata.led_mc->led_cdev.dev,
+				    &oxp_rgb_attr_group);
+	if (ret)
+		return dev_err_probe(drvdata.led_mc->led_cdev.dev, ret,
+				     "Failed to create RGB configuration attributes\n");
+
+	ret = oxp_rgb_status_show();
+	if (ret)
+		dev_warn(drvdata.led_mc->led_cdev.dev,
+			 "Failed to query RGB initial state: %i\n", ret);
+
+	return 0;
+}
+
+static int oxp_hid_probe(struct hid_device *hdev,
+			 const struct hid_device_id *id)
+{
+	int ret;
+	u16 up;
+
+	ret = hid_parse(hdev);
+	if (ret)
+		return dev_err_probe(&hdev->dev, ret, "Failed to parse HID device\n");
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+	if (ret)
+		return dev_err_probe(&hdev->dev, ret, "Failed to start HID device\n");
+
+	ret = hid_hw_open(hdev);
+	if (ret) {
+		hid_hw_stop(hdev);
+		return dev_err_probe(&hdev->dev, ret, "Failed to open HID device\n");
+	}
+
+	up = get_usage_page(hdev);
+	dev_dbg(&hdev->dev, "Got usage page %04x\n", up);
+
+	switch (up) {
+	case GEN1_USAGE_PAGE:
+		ret = oxp_cfg_probe(hdev, up);
+		if (ret) {
+			hid_hw_close(hdev);
+			hid_hw_stop(hdev);
+		}
+
+		return ret;
+	default:
+		return 0;
+	}
+}
+
+static void oxp_hid_remove(struct hid_device *hdev)
+{
+	hid_hw_close(hdev);
+	hid_hw_stop(hdev);
+}
+
+static const struct hid_device_id oxp_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CRSC, USB_DEVICE_ID_ONEXPLAYER_GEN1) },
+	{}
+};
+
+MODULE_DEVICE_TABLE(hid, oxp_devices);
+static struct hid_driver hid_oxp = {
+	.name = "hid-oxp",
+	.id_table = oxp_devices,
+	.probe = oxp_hid_probe,
+	.remove = oxp_hid_remove,
+	.raw_event = oxp_hid_raw_event,
+};
+module_hid_driver(hid_oxp);
+
+MODULE_AUTHOR("Derek J. Clark <derekjohn.clark@gmail.com>");
+MODULE_DESCRIPTION("Driver for OneXPlayer HID Interfaces");
+MODULE_LICENSE("GPL");
-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 2/5] HID: hid-oxp: Add Second Generation RGB Control
From: Derek J. Clark @ 2026-04-12 21:34 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires
  Cc: Pierre-Loup A . Griffais, Lambert Fan, Zhouwang Huang,
	Derek J . Clark, linux-input, linux-doc, linux-kernel
In-Reply-To: <20260412213444.2231505-1-derekjohn.clark@gmail.com>

Adds support for the second generation of RGB Control for OneXPlayer
devices. The interface mirrors the first generation, with some
differences to how messages are formatted.

Some devices have both a GEN1 MCU for RGB control and a GEN2 MCU for
button mapping. To avoid conflicts, quirk these devices to skip RGB
setup for the GEN2_USAGE_PAGE.

Reviewed-by: Zhouwang Huang <honjow311@gmail.com>
Tested-by: Zhouwang Huang <honjow311@gmail.com>
Signed-off-by: Derek J. Clark <derekjohn.clark@gmail.com>
---
v2:
  - Add DMI quirks table.
---
 drivers/hid/Kconfig   |   1 +
 drivers/hid/hid-ids.h |   3 +
 drivers/hid/hid-oxp.c | 151 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 155 insertions(+)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 2deaec9f467d..b779088b80b6 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -924,6 +924,7 @@ config HID_OXP
 	depends on USB_HID
 	depends on LEDS_CLASS
 	depends on LEDS_CLASS_MULTICOLOR
+	depends on DMI
 	help
 	  Say Y here if you would like to enable support for OneXPlayer handheld
 	  devices that come with RGB LED rings around the joysticks and macro buttons.
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index dcc5a3a70eaf..0d1ff879e959 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1134,6 +1134,9 @@
 #define USB_VENDOR_ID_CRSC			0x1a2c
 #define USB_DEVICE_ID_ONEXPLAYER_GEN1		0xb001
 
+#define USB_VENDOR_ID_WCH			0x1a86
+#define USB_DEVICE_ID_ONEXPLAYER_GEN2		0xfe00
+
 #define USB_VENDOR_ID_ONTRAK		0x0a07
 #define USB_DEVICE_ID_ONTRAK_ADU100	0x0064
 
diff --git a/drivers/hid/hid-oxp.c b/drivers/hid/hid-oxp.c
index c4219ecd8d71..25214356163e 100644
--- a/drivers/hid/hid-oxp.c
+++ b/drivers/hid/hid-oxp.c
@@ -10,6 +10,7 @@
 #include <linux/delay.h>
 #include <linux/dev_printk.h>
 #include <linux/device.h>
+#include <linux/dmi.h>
 #include <linux/hid.h>
 #include <linux/jiffies.h>
 #include <linux/kstrtox.h>
@@ -24,12 +25,15 @@
 #define OXP_PACKET_SIZE 64
 
 #define GEN1_MESSAGE_ID	0xff
+#define GEN2_MESSAGE_ID	0x3f
 
 #define GEN1_USAGE_PAGE	0xff01
+#define GEN2_USAGE_PAGE	0xff00
 
 enum oxp_function_index {
 	OXP_FID_GEN1_RGB_SET =		0x07,
 	OXP_FID_GEN1_RGB_REPLY =	0x0f,
+	OXP_FID_GEN2_STATUS_EVENT =	0xb8,
 };
 
 static struct oxp_hid_cfg {
@@ -121,6 +125,22 @@ struct oxp_gen_1_rgb_report {
 	u8 blue;
 } __packed;
 
+struct oxp_gen_2_rgb_report {
+	u8 report_id;
+	u8 header_id;
+	u8 padding_2;
+	u8 message_id;
+	u8 padding_4[2];
+	u8 enabled;
+	u8 speed;
+	u8 brightness;
+	u8 red;
+	u8 green;
+	u8 blue;
+	u8 padding_12[3];
+	u8 effect;
+} __packed;
+
 static u16 get_usage_page(struct hid_device *hdev)
 {
 	return hdev->collection[0].usage >> 16;
@@ -161,6 +181,44 @@ static int oxp_hid_raw_event_gen_1(struct hid_device *hdev,
 	return 0;
 }
 
+static int oxp_hid_raw_event_gen_2(struct hid_device *hdev,
+				   struct hid_report *report, u8 *data,
+				   int size)
+{
+	struct led_classdev_mc *led_mc = drvdata.led_mc;
+	struct oxp_gen_2_rgb_report *rgb_rep;
+
+	if (data[0] != OXP_FID_GEN2_STATUS_EVENT)
+		return 0;
+
+	if (data[3] != OXP_GET_PROPERTY)
+		return 0;
+
+	rgb_rep = (struct oxp_gen_2_rgb_report *)data;
+	/* Ensure we save monocolor as the list value */
+	drvdata.rgb_effect = rgb_rep->effect == OXP_EFFECT_MONO_TRUE ?
+			     OXP_EFFECT_MONO_LIST :
+			     rgb_rep->effect;
+	drvdata.rgb_speed = rgb_rep->speed;
+	drvdata.rgb_en = rgb_rep->enabled == 0 ? OXP_FEAT_DISABLED :
+						 OXP_FEAT_ENABLED;
+	drvdata.rgb_brightness = rgb_rep->brightness;
+	led_mc->led_cdev.brightness = rgb_rep->brightness / 4 *
+				      led_mc->led_cdev.max_brightness;
+	/* If monocolor had less than 100% brightness on the previous boot,
+	 * there will be no reliable way to determine the real intensity.
+	 * Since intensity scaling is used with a hardware brightness set at max,
+	 * our brightness will always look like 100%. Use the last set value to
+	 * prevent successive boots from lowering the brightness further.
+	 * Brightness will be "wrong" but the effect will remain the same visually.
+	 */
+	led_mc->subled_info[0].intensity = rgb_rep->red;
+	led_mc->subled_info[1].intensity = rgb_rep->green;
+	led_mc->subled_info[2].intensity = rgb_rep->blue;
+
+	return 0;
+}
+
 static int oxp_hid_raw_event(struct hid_device *hdev, struct hid_report *report,
 			     u8 *data, int size)
 {
@@ -171,6 +229,8 @@ static int oxp_hid_raw_event(struct hid_device *hdev, struct hid_report *report,
 	switch (up) {
 	case GEN1_USAGE_PAGE:
 		return oxp_hid_raw_event_gen_1(hdev, report, data, size);
+	case GEN2_USAGE_PAGE:
+		return oxp_hid_raw_event_gen_2(hdev, report, data, size);
 	default:
 		break;
 	}
@@ -216,6 +276,18 @@ static int oxp_gen_1_property_out(enum oxp_function_index fid, u8 *data,
 	return mcu_property_out(header, header_size, data, data_size, NULL, 0);
 }
 
+static int oxp_gen_2_property_out(enum oxp_function_index fid, u8 *data,
+				  u8 data_size)
+{
+	u8 header[] = { fid, GEN2_MESSAGE_ID, 0x01 };
+	u8 footer[] = { GEN2_MESSAGE_ID, fid };
+	size_t header_size = ARRAY_SIZE(header);
+	size_t footer_size = ARRAY_SIZE(footer);
+
+	return mcu_property_out(header, header_size, data, data_size, footer,
+				footer_size);
+}
+
 static int oxp_rgb_status_store(u8 enabled, u8 speed, u8 brightness)
 {
 	u16 up = get_usage_page(drvdata.hdev);
@@ -230,6 +302,11 @@ static int oxp_rgb_status_store(u8 enabled, u8 speed, u8 brightness)
 		if (drvdata.rgb_effect == OXP_EFFECT_MONO_LIST)
 			data[3] = 0x04;
 		return oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, 4);
+	case GEN2_USAGE_PAGE:
+		data = (u8[6]) { OXP_SET_PROPERTY, 0x00, 0x02, enabled, speed, brightness };
+		if (drvdata.rgb_effect == OXP_EFFECT_MONO_LIST)
+			data[5] = 0x04;
+		return oxp_gen_2_property_out(OXP_FID_GEN2_STATUS_EVENT, data, 6);
 	default:
 		return -ENODEV;
 	}
@@ -244,6 +321,9 @@ static ssize_t oxp_rgb_status_show(void)
 	case GEN1_USAGE_PAGE:
 		data = (u8[1]) { OXP_GET_PROPERTY };
 		return oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, 1);
+	case GEN2_USAGE_PAGE:
+		data = (u8[3]) { OXP_GET_PROPERTY, 0x00, 0x02 };
+		return oxp_gen_2_property_out(OXP_FID_GEN2_STATUS_EVENT, data, 3);
 	default:
 		return -ENODEV;
 	}
@@ -274,6 +354,16 @@ static int oxp_rgb_color_set(void)
 			data[3 * i + 3] = blue;
 		}
 		return oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, size);
+	case GEN2_USAGE_PAGE:
+		size = 57;
+		data = (u8[57]) { OXP_EFFECT_MONO_TRUE, 0x00, 0x02 };
+
+		for (i = 1; i < size / 3; i++) {
+			data[3 * i] = red;
+			data[3 * i + 1] = green;
+			data[3 * i + 2] = blue;
+		}
+		return oxp_gen_2_property_out(OXP_FID_GEN2_STATUS_EVENT, data, size);
 	default:
 		return -ENODEV;
 	}
@@ -310,6 +400,10 @@ static int oxp_rgb_effect_set(u8 effect)
 			data = (u8[1]) { effect };
 			ret = oxp_gen_1_property_out(OXP_FID_GEN1_RGB_SET, data, 1);
 			break;
+		case GEN2_USAGE_PAGE:
+			data = (u8[3]) { effect, 0x00, 0x02 };
+			ret = oxp_gen_2_property_out(OXP_FID_GEN2_STATUS_EVENT, data, 3);
+			break;
 		default:
 			ret = -ENODEV;
 		}
@@ -560,6 +654,56 @@ static struct led_classdev_mc oxp_cdev_rgb = {
 	.subled_info = oxp_rgb_subled_info,
 };
 
+struct quirk_entry {
+	bool hybrid_mcu;
+};
+
+static struct quirk_entry quirk_hybrid_mcu = {
+	.hybrid_mcu = true,
+};
+
+static const struct dmi_system_id oxp_hybrid_mcu_list[] = {
+	{
+		.ident = "OneXPlayer Apex",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "ONEXPLAYER APEX"),
+		},
+		.driver_data = &quirk_hybrid_mcu,
+	},
+	{
+		.ident = "OneXPlayer G1 AMD",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "ONEXPLAYER G1 A"),
+		},
+		.driver_data = &quirk_hybrid_mcu,
+	},
+	{
+		.ident = "OneXPlayer G1 Intel",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "ONEXPLAYER G1 i"),
+		},
+		.driver_data = &quirk_hybrid_mcu,
+	},
+	{},
+};
+
+static bool oxp_hybrid_mcu_device(void)
+{
+	const struct dmi_system_id *dmi_id;
+	struct quirk_entry *quirks;
+
+	dmi_id = dmi_first_match(oxp_hybrid_mcu_list);
+	if (!dmi_id)
+		return false;
+
+	quirks = dmi_id->driver_data;
+
+	return quirks->hybrid_mcu;
+}
+
 static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 {
 	int ret;
@@ -567,6 +711,10 @@ static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 	hid_set_drvdata(hdev, &drvdata);
 	mutex_init(&drvdata.cfg_mutex);
 	drvdata.hdev = hdev;
+
+	if (up == GEN2_USAGE_PAGE && oxp_hybrid_mcu_device())
+		goto skip_rgb;
+
 	drvdata.led_mc = &oxp_cdev_rgb;
 
 	ret = devm_led_classdev_multicolor_register(&hdev->dev, &oxp_cdev_rgb);
@@ -585,6 +733,7 @@ static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 		dev_warn(drvdata.led_mc->led_cdev.dev,
 			 "Failed to query RGB initial state: %i\n", ret);
 
+skip_rgb:
 	return 0;
 }
 
@@ -613,6 +762,7 @@ static int oxp_hid_probe(struct hid_device *hdev,
 
 	switch (up) {
 	case GEN1_USAGE_PAGE:
+	case GEN2_USAGE_PAGE:
 		ret = oxp_cfg_probe(hdev, up);
 		if (ret) {
 			hid_hw_close(hdev);
@@ -633,6 +783,7 @@ static void oxp_hid_remove(struct hid_device *hdev)
 
 static const struct hid_device_id oxp_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CRSC, USB_DEVICE_ID_ONEXPLAYER_GEN1) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_WCH, USB_DEVICE_ID_ONEXPLAYER_GEN2) },
 	{}
 };
 
-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 3/5] HID: hid-oxp: Add Second Generation Gamepad Mode Switch
From: Derek J. Clark @ 2026-04-12 21:34 UTC (permalink / raw)
  To: Jiri Kosina, Benjamin Tissoires
  Cc: Pierre-Loup A . Griffais, Lambert Fan, Zhouwang Huang,
	Derek J . Clark, linux-input, linux-doc, linux-kernel
In-Reply-To: <20260412213444.2231505-1-derekjohn.clark@gmail.com>

Adds "gamepad_mode" attribute to second generation OneXPlayer
configuration HID devices. This attribute initiates a mode shift in the
device MCU that puts it into a state where all events are routed to an
hidraw interface instead of the xpad evdev interface. This allows for
debugging the hardware input mapping added in the next patch.

Reviewed-by: Zhouwang Huang <honjow311@gmail.com>
Tested-by: Zhouwang Huang <honjow311@gmail.com>
Signed-off-by: Derek J. Clark <derekjohn.clark@gmail.com>
---
v2:
  - Rename to gamepad_mode & show relevant gamepad modes instead of
    using a debug enable/disable paradigm, to match other drivers.
---
 drivers/hid/hid-oxp.c | 130 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/drivers/hid/hid-oxp.c b/drivers/hid/hid-oxp.c
index 25214356163e..c62952537d98 100644
--- a/drivers/hid/hid-oxp.c
+++ b/drivers/hid/hid-oxp.c
@@ -33,6 +33,7 @@
 enum oxp_function_index {
 	OXP_FID_GEN1_RGB_SET =		0x07,
 	OXP_FID_GEN1_RGB_REPLY =	0x0f,
+	OXP_FID_GEN2_TOGGLE_MODE =	0xb2,
 	OXP_FID_GEN2_STATUS_EVENT =	0xb8,
 };
 
@@ -41,11 +42,22 @@ static struct oxp_hid_cfg {
 	struct hid_device *hdev;
 	struct mutex cfg_mutex; /*ensure single synchronous output report*/
 	u8 rgb_brightness;
+	u8 gamepad_mode;
 	u8 rgb_effect;
 	u8 rgb_speed;
 	u8 rgb_en;
 } drvdata;
 
+enum oxp_gamepad_mode_index {
+	OXP_GP_MODE_XINPUT = 0x00,
+	OXP_GP_MODE_DEBUG = 0x03,
+};
+
+static const char *const oxp_gamepad_mode_text[] = {
+	[OXP_GP_MODE_XINPUT] = "xinput",
+	[OXP_GP_MODE_DEBUG] = "debug",
+};
+
 enum oxp_feature_en_index {
 	OXP_FEAT_DISABLED,
 	OXP_FEAT_ENABLED,
@@ -181,6 +193,32 @@ static int oxp_hid_raw_event_gen_1(struct hid_device *hdev,
 	return 0;
 }
 
+static int oxp_gen_2_property_out(enum oxp_function_index fid, u8 *data, u8 data_size);
+
+static void oxp_mcu_init_fn(struct work_struct *work)
+{
+	u8 gp_mode_data[3] = { OXP_GP_MODE_DEBUG, 0x01, 0x02 };
+	int ret;
+
+	/* Cycle the gamepad mode */
+	ret = oxp_gen_2_property_out(OXP_FID_GEN2_TOGGLE_MODE, gp_mode_data, 3);
+	if (ret)
+		dev_err(&drvdata.hdev->dev,
+			"Error: Failed to set gamepad mode: %i\n", ret);
+
+	/* Remainder only applies for xinput mode */
+	if (drvdata.gamepad_mode == OXP_GP_MODE_DEBUG)
+		return;
+
+	gp_mode_data[0] = OXP_GP_MODE_XINPUT;
+	ret = oxp_gen_2_property_out(OXP_FID_GEN2_TOGGLE_MODE, gp_mode_data, 3);
+	if (ret)
+		dev_err(&drvdata.hdev->dev,
+			"Error: Failed to set gamepad mode: %i\n", ret);
+}
+
+static DECLARE_DELAYED_WORK(oxp_mcu_init, oxp_mcu_init_fn);
+
 static int oxp_hid_raw_event_gen_2(struct hid_device *hdev,
 				   struct hid_report *report, u8 *data,
 				   int size)
@@ -191,6 +229,14 @@ static int oxp_hid_raw_event_gen_2(struct hid_device *hdev,
 	if (data[0] != OXP_FID_GEN2_STATUS_EVENT)
 		return 0;
 
+	/* Sent ~6s after resume event, indicating the MCU has fully reset.
+	 * Re-apply our settings after this has been received.
+	 */
+	if (data[3] == OXP_EFFECT_MONO_TRUE) {
+		mod_delayed_work(system_wq, &oxp_mcu_init, msecs_to_jiffies(50));
+		return 0;
+	}
+
 	if (data[3] != OXP_GET_PROPERTY)
 		return 0;
 
@@ -288,6 +334,77 @@ static int oxp_gen_2_property_out(enum oxp_function_index fid, u8 *data,
 				footer_size);
 }
 
+static ssize_t gamepad_mode_store(struct device *dev,
+				  struct device_attribute *attr, const char *buf,
+				  size_t count)
+{
+	u16 up = get_usage_page(drvdata.hdev);
+	u8 data[3] = { 0x00, 0x01, 0x02 };
+	int ret = -EINVAL;
+	int i;
+
+	if (up != GEN2_USAGE_PAGE)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(oxp_gamepad_mode_text); i++) {
+		if (oxp_gamepad_mode_text[i] && sysfs_streq(buf, oxp_gamepad_mode_text[i])) {
+			ret = i;
+			break;
+		}
+	}
+	if (ret < 0)
+		return ret;
+
+	data[0] = ret;
+
+	ret = oxp_gen_2_property_out(OXP_FID_GEN2_TOGGLE_MODE, data, 3);
+	if (ret)
+		return ret;
+
+	drvdata.gamepad_mode = data[0];
+
+	return count;
+}
+
+static ssize_t gamepad_mode_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%s\n", oxp_gamepad_mode_text[drvdata.gamepad_mode]);
+}
+static DEVICE_ATTR_RW(gamepad_mode);
+
+static ssize_t gamepad_mode_index_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	ssize_t count = 0;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(oxp_gamepad_mode_text); i++) {
+		if (!oxp_gamepad_mode_text[i] ||
+		    oxp_gamepad_mode_text[i][0] == '\0')
+			continue;
+
+		count += sysfs_emit_at(buf, count, "%s ", oxp_gamepad_mode_text[i]);
+	}
+
+	if (count)
+		buf[count - 1] = '\n';
+
+	return count;
+}
+static DEVICE_ATTR_RO(gamepad_mode_index);
+
+static struct attribute *oxp_cfg_attrs[] = {
+	&dev_attr_gamepad_mode.attr,
+	&dev_attr_gamepad_mode_index.attr,
+	NULL,
+};
+
+static const struct attribute_group oxp_cfg_attrs_group = {
+	.attrs = oxp_cfg_attrs,
+};
+
 static int oxp_rgb_status_store(u8 enabled, u8 speed, u8 brightness)
 {
 	u16 up = get_usage_page(drvdata.hdev);
@@ -733,7 +850,20 @@ static int oxp_cfg_probe(struct hid_device *hdev, u16 up)
 		dev_warn(drvdata.led_mc->led_cdev.dev,
 			 "Failed to query RGB initial state: %i\n", ret);
 
+	/* Below features are only implemented in gen 2 */
+	if (up != GEN2_USAGE_PAGE)
+		return 0;
+
 skip_rgb:
+	mod_delayed_work(system_wq, &oxp_mcu_init, msecs_to_jiffies(50));
+
+	drvdata.gamepad_mode = OXP_GP_MODE_XINPUT;
+
+	ret = devm_device_add_group(&hdev->dev, &oxp_cfg_attrs_group);
+	if (ret)
+		return dev_err_probe(&hdev->dev, ret,
+				     "Failed to attach configuration attributes\n");
+
 	return 0;
 }
 
-- 
2.53.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox