Linux Documentation
 help / color / mirror / Atom feed
* [PATCH v2] checkpatch: add --json output mode
From: Sasha Levin @ 2026-04-08 17:24 UTC (permalink / raw)
  To: dwaipayanray1, lukas.bulwahn
  Cc: joe, mricon, corbet, skhan, apw, workflows, linux-doc,
	linux-kernel, Sasha Levin
In-Reply-To: <20260406170039.4034716-1-sashal@kernel.org>

Add a --json flag to checkpatch.pl that emits structured JSON output,
making results machine-parseable for CI systems, IDE integrations, and
AI-assisted code review tools.

The JSON output includes per-file totals (errors, warnings, checks,
lines) and an array of individual issues with structured fields for
level, type, message, file path, and line number.

The --json flag is mutually exclusive with --terse and --emacs.
Normal text output behavior is completely unchanged when --json is
not specified.

Assisted-by: Claude:claude-opus-4-6
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
Changes since v1:
- Replace hand-rolled json_escape()/json_encode_issue() with JSON::PP
  module (core since perl 5.14), as suggested by Konstantin and Joe
- Factor duplicated empty-result JSON blocks into json_print_result()
  helper
- Include used_types and ignored_types arrays in JSON output instead of
  suppressing hash_show_words, per Joe's suggestion
---
 Documentation/dev-tools/checkpatch.rst |  7 +++
 scripts/checkpatch.pl                  | 64 +++++++++++++++++++++++---
 2 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
index dccede68698ca..17e5744d3dee6 100644
--- a/Documentation/dev-tools/checkpatch.rst
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -64,6 +64,13 @@ Available options:
 
    Output only one line per report.
 
+ - --json
+
+   Output results as a JSON object.  The object includes total error, warning,
+   and check counts, plus an array of individual issues with structured fields
+   for level, type, message, file, and line number.  Cannot be used with
+   --terse or --emacs.
+
  - --showfile
 
    Show the diffed file position instead of the input file position.
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e56374662ff79..38d1a4a13ee8e 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -14,6 +14,7 @@ use File::Basename;
 use Cwd 'abs_path';
 use Term::ANSIColor qw(:constants);
 use Encode qw(decode encode);
+use JSON::PP;
 
 my $P = $0;
 my $D = dirname(abs_path($P));
@@ -33,6 +34,7 @@ my $chk_patch = 1;
 my $tst_only;
 my $emacs = 0;
 my $terse = 0;
+my $json = 0;
 my $showfile = 0;
 my $file = 0;
 my $git = 0;
@@ -93,6 +95,7 @@ Options:
   --patch                    treat FILE as patchfile (default)
   --emacs                    emacs compile window format
   --terse                    one line per report
+  --json                     output results as JSON
   --showfile                 emit diffed file position, not input file position
   -g, --git                  treat FILE as a single commit or git revision range
                              single git commit with:
@@ -320,6 +323,7 @@ GetOptions(
 	'patch!'	=> \$chk_patch,
 	'emacs!'	=> \$emacs,
 	'terse!'	=> \$terse,
+	'json!'		=> \$json,
 	'showfile!'	=> \$showfile,
 	'f|file!'	=> \$file,
 	'g|git!'	=> \$git,
@@ -379,6 +383,7 @@ help($help - 1) if ($help);
 
 die "$P: --git cannot be used with --file or --fix\n" if ($git && ($file || $fix));
 die "$P: --verbose cannot be used with --terse\n" if ($verbose && $terse);
+die "$P: --json cannot be used with --terse or --emacs\n" if ($json && ($terse || $emacs));
 
 if ($color =~ /^[01]$/) {
 	$color = !$color;
@@ -1351,7 +1356,7 @@ for my $filename (@ARGV) {
 	}
 	close($FILE);
 
-	if ($#ARGV > 0 && $quiet == 0) {
+	if (!$json && $#ARGV > 0 && $quiet == 0) {
 		print '-' x length($vname) . "\n";
 		print "$vname\n";
 		print '-' x length($vname) . "\n";
@@ -1372,7 +1377,7 @@ for my $filename (@ARGV) {
 	$file = $oldfile if ($is_git_file);
 }
 
-if (!$quiet) {
+if (!$quiet && !$json) {
 	hash_show_words(\%use_type, "Used");
 	hash_show_words(\%ignore_type, "Ignored");
 
@@ -2395,6 +2400,18 @@ sub report {
 
 	push(our @report, $output);
 
+	if ($json) {
+		our ($realfile, $realline);
+		my %issue = (
+			level => $level,
+			type => $type,
+			message => $msg,
+		);
+		$issue{file} = $realfile if (defined $realfile && $realfile ne '');
+		$issue{line} = $realline + 0 if (defined $realline && $realline);
+		push(our @json_issues, \%issue);
+	}
+
 	return 1;
 }
 
@@ -2402,6 +2419,23 @@ sub report_dump {
 	our @report;
 }
 
+sub json_print_result {
+	my ($filename, $total_errors, $total_warnings, $total_checks,
+	    $total_lines, $issues, $used_types, $ignored_types) = @_;
+	my %result = (
+		filename       => $filename,
+		total_errors   => $total_errors + 0,
+		total_warnings => $total_warnings + 0,
+		total_checks   => $total_checks + 0,
+		total_lines    => $total_lines + 0,
+		issues         => $issues,
+	);
+	$result{used_types} = $used_types if (defined $used_types);
+	$result{ignored_types} = $ignored_types if (defined $ignored_types);
+	my $json_encoder = JSON::PP->new->canonical->utf8;
+	print $json_encoder->encode(\%result) . "\n";
+}
+
 sub fixup_current_range {
 	my ($lineRef, $offset, $length) = @_;
 
@@ -2690,14 +2724,15 @@ sub process {
 	my $last_coalesced_string_linenr = -1;
 
 	our @report = ();
+	our @json_issues = ();
 	our $cnt_lines = 0;
 	our $cnt_error = 0;
 	our $cnt_warn = 0;
 	our $cnt_chk = 0;
 
 	# Trace the real file/line as we go.
-	my $realfile = '';
-	my $realline = 0;
+	our $realfile = '';
+	our $realline = 0;
 	my $realcnt = 0;
 	my $here = '';
 	my $context_function;		#undef'd unless there's a known function
@@ -7791,18 +7826,27 @@ sub process {
 	# If we have no input at all, then there is nothing to report on
 	# so just keep quiet.
 	if ($#rawlines == -1) {
+		if ($json) {
+			json_print_result($filename, 0, 0, 0, 0, []);
+		}
 		exit(0);
 	}
 
 	# In mailback mode only produce a report in the negative, for
 	# things that appear to be patches.
 	if ($mailback && ($clean == 1 || !$is_patch)) {
+		if ($json) {
+			json_print_result($filename, 0, 0, 0, 0, []);
+		}
 		exit(0);
 	}
 
 	# This is not a patch, and we are in 'no-patch' mode so
 	# just keep quiet.
 	if (!$chk_patch && !$is_patch) {
+		if ($json) {
+			json_print_result($filename, 0, 0, 0, 0, []);
+		}
 		exit(0);
 	}
 
@@ -7850,6 +7894,13 @@ sub process {
 		}
 	}
 
+	if ($json) {
+		my @used = sort keys %use_type;
+		my @ignored = sort keys %ignore_type;
+		json_print_result($filename, $cnt_error, $cnt_warn,
+				  $cnt_chk, $cnt_lines, \@json_issues,
+				  \@used, \@ignored);
+	} else {
 	print report_dump();
 	if ($summary && !($clean == 1 && $quiet == 1)) {
 		print "$filename " if ($summary_file);
@@ -7878,8 +7929,9 @@ NOTE: Whitespace errors detected.
 EOM
 		}
 	}
+	} # end !$json
 
-	if ($clean == 0 && $fix &&
+	if (!$json && $clean == 0 && $fix &&
 	    ("@rawlines" ne "@fixed" ||
 	     $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) {
 		my $newfile = $filename;
@@ -7918,7 +7970,7 @@ EOM
 		}
 	}
 
-	if ($quiet == 0) {
+	if (!$json && $quiet == 0) {
 		print "\n";
 		if ($clean == 1) {
 			print "$vname has no obvious style problems and is ready for submission.\n";
-- 
2.53.0


^ permalink raw reply related

* Re: [PATCH v10 12/21] gpu: nova-core: mm: Add unified page table entry wrapper enums
From: Joel Fernandes @ 2026-04-08 16:58 UTC (permalink / raw)
  To: Eliot Courtney, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, joel, linux-doc, amd-gfx, intel-gfx,
	intel-xe, linux-fbdev
In-Reply-To: <DHNT32C2Q5HN.LLME0RV17Z8V@nvidia.com>

Hi Eliot,

On 4/8/2026 9:26 AM, Eliot Courtney wrote:
> On Tue Apr 7, 2026 at 10:59 PM JST, Joel Fernandes wrote:
>> Hi Eliot,
>>
>> On 4/7/2026 9:42 AM, Eliot Courtney wrote:
>>> On Tue Apr 7, 2026 at 6:55 AM JST, Joel Fernandes wrote:
>>>>>> +    /// Compute upper bound on page table pages needed for `num_virt_pages`.
>>>>>> +    ///
>>>>>> +    /// Walks from PTE level up through PDE levels, accumulating the tree.
>>>>>> +    pub(crate) fn pt_pages_upper_bound(&self, num_virt_pages: usize) -> usize {
>>>>>> +        let mut total = 0;
>>>>>> +
>>>>>> +        // PTE pages at the leaf level.
>>>>>> +        let pte_epp = self.entries_per_page(self.pte_level());
>>>>>> +        let mut pages_at_level = num_virt_pages.div_ceil(pte_epp);
>>>>>> +        total += pages_at_level;
>>>>>> +
>>>>>> +        // Walk PDE levels bottom-up (reverse of pde_levels()).
>>>>>> +        for &level in self.pde_levels().iter().rev() {
>>>>>> +            let epp = self.entries_per_page(level);
>>>>>> +
>>>>>> +            // How many pages at this level do we need to point to
>>>>>> +            // the previous pages_at_level?
>>>>>> +            pages_at_level = pages_at_level.div_ceil(epp);
>>>>>> +            total += pages_at_level;
>>>>>> +        }
>>>>>> +
>>>>>> +        total
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>
>>>>> We have a lot of matches on the MMU version here (and below in Pte, Pde,
>>>>> DualPde). What about making MmuVersion into a trait (e.g. Mmu) with
>>>>> associated types for Pte, Pde, DualPde which can implement traits
>>>>> defining their common operations too?
>>>>
>>>> I coded this up and it did not look pretty, there's not much LOC savings and the
>>>> code becomes harder to read because of parametrization of several functions. Also:
>>>
>>> Thanks for looking into it. Sorry to be a bother, but would you have a
>>> branch around with the code? I'm curious what didn't look good about it.
>>
>> Sorry but I already mentioned that above, the parameterizing of dozens of
>> function call sites, 3-4 new traits (because each struct like
>> Pte/Pde/DualPde etc each need their own trait which different MMU versions
>> implement) etc. The code because hard to read and readability is the top
>> critical criteria for me - I am personally strictly against "Lets use shiny
>> features in language at the cost of making code unreadable". Because that
>> translates into bugs and nightmare for maintainability.
>>
>> I don't have the code at the moment, but if you still want to spend on time
>> on this direction, feel free to share a tree. I am happy to take a look.
> 
> I had a go at this, you can see the branch here [1] - it might not be
> perfect, but I think the shape is directionally good. It's structured so
> the HEAD commit has the diff from the current approach to the
> parametrised approach. The main decision is where to do the type
> erasure, I chose in `Vmm` since it looks like the main top level API for
> this code, but could do `BarUser` instead. I think it's overall better.
> I also think Alex's point about associated types making it easier to use
> the appropriate Bounded type is a good one.
> 
> [1]: https://github.com/Edgeworth/linux/commits/review/nova-mm-v10/
First, thanks for the effort. I looked through this, its pretty much what I
had before when I used traits. I don't think it is better to be honest. In
fact your version is worse, it adds many new types and things like the
following which I did not need before.

To put it mildly, the following suggestion should not be anywhere near my code:

/// Type-erased MMU-specific [`Vmm`] implementations.
enum VmmInner {
    /// `Vmm` implementation for MMU v2.
    V2(VmmImpl<MmuV2>),
    /// `Vmm` implementation for MMU v3.
    V3(VmmImpl<MmuV3>),
}

/// MMU-specific [`Vmm`] implementation.
struct VmmImpl<M: Mmu> {

Seriously, I have to pass on this. :-)

And, you unfortunately seem to have ignored my point about requiring 4 NEW
traits (Mmu, PteOps, PdeOps, DualPdeOps etc), which I did not need before.
So you're making the code much much worse than before actually. We don't
new traits and types pointlessly.

The only positive thing I could take away from your diff is the following
(I thought I had already done that, but I'll double check).

-    fn level_index(&self, level: u64) -> u64 {
+    fn level_index(&self, level: PageTableLevel) -> u64 {

Also you're parametrizing VirtualAddress as well which I did not have before:

-     let va = VirtualAddress::from(vfn);
+     let va = M::va(VirtualAddress::from(vfn));

This is another step back.

> I also think Alex's point about associated types making it easier to use
> the appropriate Bounded type is a good one.

I will reply to Alex thread, separately. I have some good data that should
hopefully convince you and Alex that my approach in this patch is better
(Version struct based dispatch than monomorphization). I would emphasize,
as we all know, that we should make optimizations and changes based on real
data and proper technical arguments so in the spirit of that, I have
collected data with both approaches and I will reply to Alex's email with
all that in there.

Also, the bounded types usage is orthogonal to version-parameterization.
That can be done regardless, we already use bitfield macro in this code and
can use bounded types within that if needed to restrict type creation. So I
don't think we should mix the 2 concepts "bounded types" and
"parameterization".

thanks,

--
Joel Fernandes



^ permalink raw reply

* Re: [PATCH v8 0/2] PCI: s390: Expose the UID as an arch specific PCI slot attribute
From: Bjorn Helgaas @ 2026-04-08 16:57 UTC (permalink / raw)
  To: Vasily Gorbik
  Cc: Bjorn Helgaas, Niklas Schnelle, Jonathan Corbet, Lukas Wunner,
	Shuah Khan, Farhan Ali, Alexander Gordeev, Christian Borntraeger,
	Gerald Schaefer, Gerd Bayer, Heiko Carstens, Julian Ruess,
	Matthew Rosato, Peter Oberparleiter, Ramesh Errabolu,
	Sven Schnelle, linux-doc, linux-kernel, linux-pci, linux-s390,
	Randy Dunlap
In-Reply-To: <ttd6cui@ub.hpns>

On Wed, Apr 08, 2026 at 02:18:18PM +0200, Vasily Gorbik wrote:
> On Tue, Apr 07, 2026 at 03:24:44PM +0200, Niklas Schnelle wrote:
> > Add a mechanism for architecture specific attributes on
> > PCI slots in order to add the user-defined ID (UID) as an s390 specific
> > PCI slot attribute. First though improve some issues with the s390 specific
> > documentation of PCI sysfs attributes noticed during development.
> 
> > Niklas Schnelle (2):
> >       docs: s390/pci: Improve and update PCI documentation
> >       PCI: s390: Expose the UID as an arch specific PCI slot attribute
> > 
> >  Documentation/arch/s390/pci.rst | 151 +++++++++++++++++++++++++++-------------
> >  arch/s390/include/asm/pci.h     |   4 ++
> >  arch/s390/pci/pci_sysfs.c       |  20 ++++++
> >  drivers/pci/slot.c              |  13 +++-
> >  4 files changed, 140 insertions(+), 48 deletions(-)
> 
> Bjorn, would you like to take this through the PCI tree? I think Niklas
> phrased the subject with that in mind.
> 
> Otherwise, I can take it through the s390 tree. If so, could you give
> me your Acked-by?

I did ack it, but I guess it was a previous version:

  https://lore.kernel.org/all/20260407193205.GA247806@bhelgaas

It'd be great if you merged it via s390.  The interesting parts are
really in arch/s390.

^ permalink raw reply

* Re: [PATCH v8 2/2] PCI: s390: Expose the UID as an arch specific PCI slot attribute
From: Bjorn Helgaas @ 2026-04-08 16:57 UTC (permalink / raw)
  To: Niklas Schnelle
  Cc: Bjorn Helgaas, Jonathan Corbet, Lukas Wunner, Shuah Khan,
	Farhan Ali, Alexander Gordeev, Christian Borntraeger,
	Gerald Schaefer, Gerd Bayer, Heiko Carstens, Julian Ruess,
	Matthew Rosato, Peter Oberparleiter, Ramesh Errabolu,
	Sven Schnelle, Vasily Gorbik, linux-doc, linux-kernel, linux-pci,
	linux-s390
In-Reply-To: <20260407-uid_slot-v8-2-15ae4409d2ce@linux.ibm.com>

On Tue, Apr 07, 2026 at 03:24:46PM +0200, Niklas Schnelle wrote:
> On s390, an individual PCI function can generally be identified by two
> identifiers, the FID and the UID. Which identifier is used depends on
> the scope and the platform configuration.
> 
> The first identifier, the FID, is always available and identifies a PCI
> device uniquely within a machine. The FID may be virtualized by
> hypervisors, but on the LPAR level, the machine scope makes it
> impossible to create the same configuration based on FIDs on two
> different LPARs of the same machine, and difficult to reuse across
> machines.
> 
> Such matching LPAR configurations are useful, though, allowing
> standardized setups and booting a Linux installation on different LPARs.
> To this end the UID, or user-defined identifier, was introduced. While
> it is only guaranteed to be unique within an LPAR and only if indicated
> by firmware, it allows users to replicate PCI device setups.
> 
> On s390, which uses a machine hypervisor, a per PCI function hotplug
> model is used. The shortcoming with the UID then is, that it is not
> visible to the user without first attaching the PCI function and
> accessing the "uid" device attribute. The FID, on the other hand, is
> used as the slot name and is thus known even with the PCI function in
> standby.
> 
> Remedy this shortcoming by providing the UID as an attribute on the slot
> allowing the user to identify a PCI function based on the UID without
> having to first attach it. Do this via a macro mechanism analogous to
> what was introduced by commit 265baca69a07 ("s390/pci: Stop usurping
> pdev->dev.groups") for the PCI device attributes.
> 
> Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
> Reviewed-by: Julian Ruess <julianr@linux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>

Acked-by: Bjorn Helgaas <bhelgaas@google.com> # drivers/pci/slot.c

> ---
>  Documentation/arch/s390/pci.rst |  7 +++++++
>  arch/s390/include/asm/pci.h     |  4 ++++
>  arch/s390/pci/pci_sysfs.c       | 20 ++++++++++++++++++++
>  drivers/pci/slot.c              | 13 ++++++++++++-
>  4 files changed, 43 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/arch/s390/pci.rst b/Documentation/arch/s390/pci.rst
> index c3476de4f03278d07099aa32cbea0f868b6e9c9c..80f4ba19315994da056a10b4d216d61ff22ea5aa 100644
> --- a/Documentation/arch/s390/pci.rst
> +++ b/Documentation/arch/s390/pci.rst
> @@ -58,6 +58,13 @@ Entries specific to zPCI functions and entries that hold zPCI information.
>  
>    - /sys/bus/pci/slots/XXXXXXXX/power
>  
> +  In addition to using the FID as the name of the slot, the slot directory
> +  also contains the following s390-specific slot attributes.
> +
> +  - uid:
> +    The User-defined identifier (UID) of the function which may be configured
> +    by this slot. See also the corresponding attribute of the device.
> +
>    A physical function that currently supports a virtual function cannot be
>    powered off until all virtual functions are removed with:
>    echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvf
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index c0ff19dab5807c7e1aabb48a0e9436aac45ec97d..5dcf35f0f325f5f44b28109a1c8d9aef18401035 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -208,6 +208,10 @@ extern const struct attribute_group zpci_ident_attr_group;
>  			    &pfip_attr_group,		 \
>  			    &zpci_ident_attr_group,
>  
> +extern const struct attribute_group zpci_slot_attr_group;
> +
> +#define ARCH_PCI_SLOT_GROUPS (&zpci_slot_attr_group)
> +
>  extern unsigned int s390_pci_force_floating __initdata;
>  extern unsigned int s390_pci_no_rid;
>  
> diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
> index c2444a23e26c4218832bb91930b5f0ffd498d28f..d98d97df792adb3c7e415a8d374cc2f3a65fbb52 100644
> --- a/arch/s390/pci/pci_sysfs.c
> +++ b/arch/s390/pci/pci_sysfs.c
> @@ -187,6 +187,17 @@ static ssize_t index_show(struct device *dev,
>  }
>  static DEVICE_ATTR_RO(index);
>  
> +static ssize_t zpci_uid_slot_show(struct pci_slot *slot, char *buf)
> +{
> +	struct zpci_dev *zdev = container_of(slot->hotplug, struct zpci_dev,
> +					     hotplug_slot);
> +
> +	return sysfs_emit(buf, "0x%x\n", zdev->uid);
> +}
> +
> +static struct pci_slot_attribute zpci_slot_attr_uid =
> +	__ATTR(uid, 0444, zpci_uid_slot_show, NULL);
> +
>  static umode_t zpci_index_is_visible(struct kobject *kobj,
>  				     struct attribute *attr, int n)
>  {
> @@ -243,6 +254,15 @@ const struct attribute_group pfip_attr_group = {
>  	.attrs = pfip_attrs,
>  };
>  
> +static struct attribute *zpci_slot_attrs[] = {
> +	&zpci_slot_attr_uid.attr,
> +	NULL,
> +};
> +
> +const struct attribute_group zpci_slot_attr_group = {
> +	.attrs = zpci_slot_attrs,
> +};
> +
>  static struct attribute *clp_fw_attrs[] = {
>  	&uid_checking_attr.attr,
>  	NULL,
> diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
> index 787311614e5b6ebb39e7284f9b9f205a0a684d6d..2f8fcfbbec24e73d0bb6e40fd04c05a94f518045 100644
> --- a/drivers/pci/slot.c
> +++ b/drivers/pci/slot.c
> @@ -96,7 +96,18 @@ static struct attribute *pci_slot_default_attrs[] = {
>  	&pci_slot_attr_cur_speed.attr,
>  	NULL,
>  };
> -ATTRIBUTE_GROUPS(pci_slot_default);
> +
> +static const struct attribute_group pci_slot_default_group = {
> +	.attrs = pci_slot_default_attrs,
> +};
> +
> +static const struct attribute_group *pci_slot_default_groups[] = {
> +	&pci_slot_default_group,
> +#ifdef ARCH_PCI_SLOT_GROUPS
> +	ARCH_PCI_SLOT_GROUPS,
> +#endif
> +	NULL,
> +};
>  
>  static const struct kobj_type pci_slot_ktype = {
>  	.sysfs_ops = &pci_slot_sysfs_ops,
> 
> -- 
> 2.51.0
> 

^ permalink raw reply

* Re: [PATCH RFC v4 10/44] KVM: guest_memfd: Add support for KVM_SET_MEMORY_ATTRIBUTES2
From: Ackerley Tng @ 2026-04-08 16:54 UTC (permalink / raw)
  To: Sean Christopherson, Michael Roth
  Cc: Vishal Annapurve, aik, andrew.jones, binbin.wu, brauner,
	chao.p.peng, david, ira.weiny, jmattson, jthoughton, oupton,
	pankaj.gupta, qperret, rick.p.edgecombe, rientjes, shivankg,
	steven.price, tabba, willy, wyihan, yan.y.zhao, forkloop,
	pratyush, suzuki.poulose, aneesh.kumar, Paolo Bonzini,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, Shuah Khan,
	Andrew Morton, Chris Li, Kairui Song, Kemeng Shi, Nhat Pham,
	Baoquan He, Barry Song, Axel Rasmussen, Yuanchu Xie, Wei Xu,
	Jason Gunthorpe, Vlastimil Babka, kvm, linux-kernel,
	linux-trace-kernel, linux-doc, linux-kselftest, linux-mm
In-Reply-To: <adWidf8UgZeYctr1@google.com>

Sean Christopherson <seanjc@google.com> writes:

> On Tue, Apr 07, 2026, Michael Roth wrote:
>> On Tue, Apr 07, 2026 at 02:50:58PM -0700, Vishal Annapurve wrote:
>> > On Tue, Apr 7, 2026 at 2:09 PM Michael Roth <michael.roth@amd.com> wrote:
>> > >
>> > > > TLDR:
>> > > >
>> > > > + Think of populate ioctls not as KVM touching memory, but platform
>> > > >   handling population.
>> > > > + KVM code (kvm_gmem_populate) still doesn't touch memory contents
>> > > > + post_populate is platform-specific code that handles loading into
>> > > >   private destination memory just to support legacy non-in-place
>> > > >   conversion.
>> > > > + Don't complicate populate ioctls by doing conversion just to support
>> > > >   legacy use-cases where platform-specific code has to do copying on
>> > > >   the host.
>> > >
>> > > That's a good point: these are only considerations in the context of
>> > > actually copying from src->dst, but with in-place conversion the
>> > > primary/more-performant approach will be for userspace to initial
>> > > directly. I.e. if we enforced that, then gmem could right ascertain that
>> > > it isn't even writing to private pages via these hooks and any
>> > > manipulation of that memory is purely on the part of the trusted entity
>> > > handling initial encryption/etc.
>> > >
>> > > I understand that we decided to keep the option of allowing separate
>> > > src/dst even with in-place conversion, but it doesn't seem worthwhile if
>> > > that necessarily means we need to glue population+conversion together in
>> > > 1 clumsy interface that needs to handle partial return/error responses to
>> > > userspace (or potentially get stuck forever in the conversion path).
>> >
>> > I think ARM needs userspace to specify separate source and destination
>> > memory ranges for initial population as ARM doesn't support in-place
>> > memory encryption. [1]
>> >
>> > [1] https://lore.kernel.org/kvm/20260318155413.793430-25-steven.price@arm.com/
>> >
>> > >
>> > > So I agree with Ackerley's proposal (which I guess is the same as what's
>> > > in this series).
>> > >
>> > > However, 1 other alternative would be to do what was suggested on the
>> > > call, but require userspace to subsequently handle the shared->private
>> > > conversion. I think that would be workable too.
>> >
>> > IIUC, Converting memory ranges to private after it essentially is
>> > treated as private by the KVM CC backend will expose the
>> > implementation to the same risk of userspace being able to access
>> > private memory and compromise host safety which guest_memfd was
>> > invented to address.
>>
>> Doh, fair point. Doing conversion as part of the populate call would allow
>> us to use the filemap write-lock to avoid userspace being able to fault
>> in private (as tracked by trusted entity) pages before they are
>> transitioned to private (as tracked by KVM), so it's safer than having
>> userspace drive it.
>>
>> But obviously I still think Ackerley's original proposal has more
>> upsides than the alternatives mentioned so far.
>
> I'm a bit lost.  What exactly is/was Ackerley's original proposal?  If the answer
> is "convert pages from shared=>private when populating via in-place conversion",
> then I agree, because AFAICT, that's the only sane option.

Discussed this at PUCK today 2026-04-08.

The update is that the KVM_SET_MEMORY_ATTRIBUTES2 guest_memfd ioctl will
now support the PRESERVE flag for TDX and SNP only if the setup for the
VM in question hasn't yet been completed (KVM_TDX_FINALIZE_VM or
KVM_SEV_SNP_LAUNCH_FINISH hasn't completed yet).

The populate flow will be

1a. Get contents to be loaded in guest_memfd (src_addr: NULL) as shared
OR
1b. Provide contents from some other userspace address (src_addr:
    userspace address)

2.  KVM_SET_MEMORY_ATTRIBUTES2(attribute: PRIVATE and flags: PRESERVE)
3.  KVM_SEV_SNP_LAUNCH_UPDATE() or KVM_TDX_INIT_MEM_REGION()
...
4.  KVM_SEV_SNP_LAUNCH_FINISH() or KVM_TDX_FINALIZE_VM()

This applies whether src_addr is some userspace address that is shared
or NULL, so the non-in-place loading flow is not considered legacy. ARM
CCA can still use that flow :)

Other than supporting PRESERVE only if the setup for the VM in question
hasn't yet been completed, KVM's fault path will also not permit faults
if the setup hasn't been completed. (Some exception setup will be used
for TDX to be able to perform the required fault.)

^ permalink raw reply

* Re: [PATCH 0/4] docs/zh_CN: update rust/ subsystem translations
From: Ben Guo @ 2026-04-08 16:54 UTC (permalink / raw)
  To: Dongliang Mu, Alex Shi, Yanteng Si, Jonathan Corbet
  Cc: linux-doc, linux-kernel, rust-for-linux
In-Reply-To: <8dd6239f-eac6-4e81-a1b5-a4e6c45d07fd@hust.edu.cn>

On 4/8/26 7:44 PM, Dongliang Mu wrote:
> Hi Guo,
> 
> I found an issue in this patchset: please do not directly include my 
> review tag from the internal mailing list [1].
> 
> After you submit it to the linux‑doc mailing list, I will add my review 
> tag at that time. Including it now would look inappropriate.
> 
> Our internal review is only intended to maintain patch quality for our 
> open‑source club.

Hi Dongliang,

Thanks for pointing this out.

I will remove your Reviewed-by from all patches and resend as v2.

Thanks,
Ben

^ permalink raw reply

* Re: [PATCH 3/4] docs/zh_CN: update rust/quick-start.rst translation
From: Ben Guo @ 2026-04-08 16:51 UTC (permalink / raw)
  To: Gary Guo, Alex Shi, Yanteng Si, Dongliang Mu, Jonathan Corbet
  Cc: linux-doc, linux-kernel, rust-for-linux
In-Reply-To: <DHNQOSMQJV1A.18UJB6VG0QK70@garyguo.net>

On 4/8/26 7:33 PM, Gary Guo wrote:
> Hi Ben,
> 
> Thanks on updating the doc translation. There has been new changes to
> quick-start.rst on rust-next, could you update the translation to base on that
> please?
> 
> Thanks,
> Gary

Hi Gary, 
  

  
  

Thanks for the review. This series is based on the Chinese documentation
maintainer's tree (alexs/linux.git docs-next), which does not yet have
the latest quick-start.rst changes from the Rust-for-Linux rust-next
tree.

Would it be better to wait until those changes land in our base tree
and then resend with the updated translation? Or would you prefer a
different approach?

Thanks,
Ben

^ permalink raw reply

* Re: [RFC PATCH v3 00/10] mm/damon: introduce DAMOS failed region quota charge ratio
From: Bijan Tabatabai @ 2026-04-08 16:48 UTC (permalink / raw)
  To: SeongJae Park
  Cc: Bijan Tabatabai, Liam R. Howlett, Andrew Morton, Brendan Higgins,
	David Gow, David Hildenbrand, Jonathan Corbet, Lorenzo Stoakes,
	Michal Hocko, Mike Rapoport, Shuah Khan, Shuah Khan,
	Suren Baghdasaryan, Vlastimil Babka, damon, kunit-dev, linux-doc,
	linux-kernel, linux-kselftest, linux-mm
In-Reply-To: <20260407010536.83603-1-sj@kernel.org>

On Mon,  6 Apr 2026 18:05:22 -0700 SeongJae Park <sj@kernel.org> wrote:

Hi SJ,

> TL; DR: Let users set different DAMOS quota charge ratios for DAMOS
> action failed regions, for deterministic and consistent DAMOS action
> progress.
> 
> Common Reports: Unexpectedly Slow DAMOS
> =======================================
> 
> One common issue report that we get from DAMON users is that DAMOS
> action applying progress speed is sometimes much slower than expected.
> And one common root cause is that the DAMOS quota is exceeded by the
> action applying failed memory regions.
> 
> For example, a group of users tried to run DAMOS-based proactive memory
> reclamation (DAMON_RECLAIM) with 100 MiB per second DAMOS quota.  They
> ran it on a system having no active workload which means all memory of
> the system is cold.  The expectation was that the system will show 100
> MiB per second reclamation until (nearly) all memory is reclaimed. But
> what they found is that the speed is quite inconsistent and sometimes it
> becomes very slower than the expectation, sometimes even no reclamation
> at all for about tens of seconds.  The upper limit of the speed (100 MiB
> per second) was being kept as expected, though.
> 
> By monitoring the qt_exceeds (number of DAMOS quota exceed events) DAMOS
> stat, we found DAMOS quota is always exceeded when the speed is slow. By
> monitoring sz_tried and sz_applied (the total amount of DAMOS action
> tried memory and succeeded memory) DAMOS stats together, we found the
> reclamation attempts nearly always failed when the speed is slow.
> 
> DAMOS quota charges DAMOS action tried regions regardless of the
> successfulness of the try.  Hence in the example reported case, there
> was unreclaimable memory spread around the system memory.  Sometimes
> nearly 100 MiB of memory that DAMOS tried to reclaim in the given quota
> interval was reclaimable, and therefore showed nearly 100 MiB per second
> speed.  Sometimes nearly 99 MiB of memory that DAMOS was trying to
> reclaim in the given quota interval was unreclaimable, and therefore
> showing only about 1 MiB per second reclaim speed.
> 
> We explained it is an expected behavior of the feature rather than a
> bug, as DAMOS quota is there for only the upper-limit of the speed.  The
> users agreed and later reported a huge win from the adoption of
> DAMON_RECLAIM on their products.

Thanks for this series. This is a problem I have come across and am looking
forward to seeing this land.

> It is Not a Bug but a Feature; But...
> =====================================
> 
> So nothing is broken.  DAMOS quota is working as intended, as the upper
> limit of the speed.  It also provides its behavior observability via
> DAMOS stat.  In the real world production environment that runs long
> term active workloads and matters stability, the speed sometimes being
> slow is not a real problem.
> 
> But, the non-deterministic behavior is sometimes annoying, especially in
> lab environments.  Even in a realistic production environment, when
> there is a huge amount of DAMOS action unapplicable memory, the speed
> could be problematically slow.  Let's suppose a virtual machines
> provider that setup 99% of the host memory as hugetlb pages that cannot
> be reclaimed, to give it to virtual machines.  Also, when aim-oriented
> DAMOS auto-tuning is applied, this could also make the internal feedback
> loop confused.
> 
> The intention of the current behavior was that trying DAMOS action to
> regions would anyway impose some overhead, and therefore somehow be
> charged.  But in the real world, the overhead for failed action is much
> lighter than successful action.  Charging those at the same ratio may be
> unfair, or at least suboptimum in some environments.
> 
> DAMOS Action Failed Region Quota Charge Ratio
> =============================================
> 
> Let users set the charge ratio for the action-failed memory, for more
> optimal and deterministic use of DAMOS.  It allows users to specify the
> numerator and the denominator of the ratio for flexible setup.  For
> example, let's suppose the numerator and the denominator are set to 1
> and 4,096, respectively.  The ratio is 1 / 4,096.  A DAMOS scheme action
> is applied to 5 GiB memory.  For 1 GiB of the memory, the action is
> succeeded.  For the rest (4 GiB), the action is failed.  Then, only 1
> GiB and 1 MiB quota is charged.
> 
> The optimal charge ratio will depend on the use case and
> system/workload.  I'd recommend starting from setting the nominator as 1
> and the denominator as PAGE_SIZE and tune based on the results, because
> many DAMOS actions are applied at page level.

This makes sense, but the quota is also considered when setting the minimum
allowable score in damos_adjust_quota(), which, to my understanding, assumes
that all of the all of a region's data will by applied. If an action fails for
a significant amount of the memory, a lower score than what was calculated in
damos_adjust_quota() could be valid. If that's the case, the scheme would be
applied to fewer regions than strictly necessary.

As you mention above, this is not a correctness issue because the quota only
guarantees an upper limit on the amount of data the scheme is applied to.
Additionally, it may very well be true that what I listed above would not be
very noticeable in practice. I just thought this was worth pointing out as
something to think about.

Thanks,
Bijan

<snip>

Sent using hkml (https://github.com/sjp38/hackermail)

^ permalink raw reply

* [PATCH v3 1/2] platform/x86/intel-uncore-freq: Rename instance_id
From: Maciej Wieczor-Retman @ 2026-04-08 16:27 UTC (permalink / raw)
  To: skhan, ilpo.jarvinen, hansg, corbet, srinivas.pandruvada
  Cc: linux-kernel, platform-driver-x86, linux-doc, m.wieczorretman,
	Maciej Wieczor-Retman
In-Reply-To: <cover.1775665057.git.m.wieczorretman@pm.me>

From: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>

The "instance" word has a specific meaning in TPMI. It is a physical
index related to compute dies and IO dies present on a single TPMI
partition (which is also a single TPMI device). It's used for mapping
MMIO blocks for direct TPMI register access.

The currently used "instance_id" uncore_data struct field is a
sequentially generated value that's used for appending to uncore
directories inside the /sys/devices/system/cpu/intel_uncore_frequency
directory. It has no relation to the physical TPMI elements.

Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
Changelog v3:
- Add Srinivas' Acked-by.

Changelog v2:
- Redid the first paragraph to better describe what "instance" is.
- Rename seqname_id to seqnum_id to emphasize it's a sequential number
  not sequential name.

 .../x86/intel/uncore-frequency/uncore-frequency-common.c    | 6 +++---
 .../x86/intel/uncore-frequency/uncore-frequency-common.h    | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
index 7070c94324e0..25ab511ed8d2 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
@@ -268,7 +268,7 @@ int uncore_freq_add_entry(struct uncore_data *data, int cpu)
 		if (ret < 0)
 			goto uncore_unlock;
 
-		data->instance_id = ret;
+		data->seqnum_id = ret;
 		scnprintf(data->name, sizeof(data->name), "uncore%02d", ret);
 	} else {
 		scnprintf(data->name, sizeof(data->name), "package_%02d_die_%02d",
@@ -281,7 +281,7 @@ int uncore_freq_add_entry(struct uncore_data *data, int cpu)
 	ret = create_attr_group(data, data->name);
 	if (ret) {
 		if (data->domain_id != UNCORE_DOMAIN_ID_INVALID)
-			ida_free(&intel_uncore_ida, data->instance_id);
+			ida_free(&intel_uncore_ida, data->seqnum_id);
 	} else {
 		data->control_cpu = cpu;
 		data->valid = true;
@@ -301,7 +301,7 @@ void uncore_freq_remove_die_entry(struct uncore_data *data)
 	data->control_cpu = -1;
 	data->valid = false;
 	if (data->domain_id != UNCORE_DOMAIN_ID_INVALID)
-		ida_free(&intel_uncore_ida, data->instance_id);
+		ida_free(&intel_uncore_ida, data->seqnum_id);
 
 	mutex_unlock(&uncore_lock);
 }
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
index 0abe850ef54e..0d5fd91ee0aa 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
@@ -35,7 +35,7 @@
  * @die_id:		Die id for this instance
  * @domain_id:		Power domain id for this instance
  * @cluster_id:		cluster id in a domain
- * @instance_id:	Unique instance id to append to directory name
+ * @seqnum_id:		Unique sequential id to append to directory name
  * @name:		Sysfs entry name for this instance
  * @agent_type_mask:	Bit mask of all hardware agents for this domain
  * @uncore_attr_group:	Attribute group storage
@@ -71,7 +71,7 @@ struct uncore_data {
 	int die_id;
 	int domain_id;
 	int cluster_id;
-	int instance_id;
+	int seqnum_id;
 	char name[32];
 	u16  agent_type_mask;
 
-- 
2.53.0



^ permalink raw reply related

* [PATCH v3 2/2] platform/x86/intel-uncore-freq: Expose instance ID in the sysfs
From: Maciej Wieczor-Retman @ 2026-04-08 16:27 UTC (permalink / raw)
  To: skhan, ilpo.jarvinen, hansg, corbet, srinivas.pandruvada
  Cc: linux-kernel, platform-driver-x86, linux-doc, m.wieczorretman,
	Maciej Wieczor-Retman
In-Reply-To: <cover.1775665057.git.m.wieczorretman@pm.me>

From: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>

Insufficient data is exported to allow direct access to TPMI registers
through MMIO. On non-partitioned systems domain_id can be used both for
mapping CPUs to their compute die IDs and for mapping die indices to
their MMIO memory blocks presented to userspace via TPMI debugfs.
However on partitioned systems the debugfs association doesn't work
anymore. This is due to how TPMI partitioning influences domain_id
calculation. The previous association is lost on partitioned systems in
order to keep using domain_id for mapping CPUs to compute dies.

Expose the instance ID in sysfs that's unique in the scope of one TPMI
partition (and hence one TPMI device). It's a physical index into mapped
MMIO blocks and can be used by userspace to figure out how to directly
access TPMI registers.

Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>
---
Changelog v3:
- Change sprintf -> sysfs_emit in show_instance_id().
- Change part of patch message 'MMIO memory blocks mapped' -> 'MMIO
  memory blocks presented to userspace...'
- Change assigning function to static inline.

Changelog v2:
- Redo the patch message.
- Redo the function comment that assigns instance_id.
- Modify the documentation.

 .../pm/intel_uncore_frequency_scaling.rst         |  7 +++++++
 .../uncore-frequency/uncore-frequency-common.c    | 10 ++++++++++
 .../uncore-frequency/uncore-frequency-common.h    |  6 +++++-
 .../uncore-frequency/uncore-frequency-tpmi.c      | 15 ++++++++++++++-
 4 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
index d367ba4d744a..b43ad4d5e333 100644
--- a/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
+++ b/Documentation/admin-guide/pm/intel_uncore_frequency_scaling.rst
@@ -88,8 +88,15 @@ and "fabric_cluster_id" in the directory.
 
 Attributes in each directory:
 
+``instance_id``
+	This attribute is used to get die indices in userspace mapped MMIO
+	blocks. Indices are local to a single TPMI partition. Needed for direct
+	TPMI register access.
+
 ``domain_id``
 	This attribute is used to get the power domain id of this instance.
+	Indices are unique in all TPMI partitions on a given CPU package. Can be
+	used to map compute dies to corresponding CPUs.
 
 ``die_id``
 	This attribute is used to get the Linux die id of this instance.
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
index 25ab511ed8d2..3b554418a7a3 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c
@@ -29,6 +29,13 @@ static ssize_t show_domain_id(struct kobject *kobj, struct kobj_attribute *attr,
 	return sysfs_emit(buf, "%u\n", data->domain_id);
 }
 
+static ssize_t show_instance_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct uncore_data *data = container_of(attr, struct uncore_data, instance_id_kobj_attr);
+
+	return sysfs_emit(buf, "%u\n", data->instance_id);
+}
+
 static ssize_t show_fabric_cluster_id(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 {
 	struct uncore_data *data = container_of(attr, struct uncore_data, fabric_cluster_id_kobj_attr);
@@ -200,6 +207,9 @@ static int create_attr_group(struct uncore_data *data, char *name)
 	if (data->domain_id != UNCORE_DOMAIN_ID_INVALID) {
 		init_attribute_root_ro(domain_id);
 		data->uncore_attrs[index++] = &data->domain_id_kobj_attr.attr;
+		init_attribute_root_ro(instance_id);
+		data->uncore_attrs[index++] = &data->instance_id_kobj_attr.attr;
+
 		init_attribute_root_ro(fabric_cluster_id);
 		data->uncore_attrs[index++] = &data->fabric_cluster_id_kobj_attr.attr;
 		init_attribute_root_ro(package_id);
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
index 0d5fd91ee0aa..e319448dc1a4 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h
@@ -36,6 +36,7 @@
  * @domain_id:		Power domain id for this instance
  * @cluster_id:		cluster id in a domain
  * @seqnum_id:		Unique sequential id to append to directory name
+ * @instance_id:	Die indices or feature instances for a single TPMI device
  * @name:		Sysfs entry name for this instance
  * @agent_type_mask:	Bit mask of all hardware agents for this domain
  * @uncore_attr_group:	Attribute group storage
@@ -56,6 +57,7 @@
  * @elc_floor_freq_khz_kobj_attr: Storage for kobject attribute elc_floor_freq_khz
  * @agent_types_kobj_attr: Storage for kobject attribute agent_type
  * @die_id_kobj_attr:	Attribute storage for die_id information
+ * @instance_id_kobj_attr: Attribute storage for instance_id value
  * @uncore_attrs:	Attribute storage for group creation
  *
  * This structure is used to encapsulate all data related to uncore sysfs
@@ -72,6 +74,7 @@ struct uncore_data {
 	int domain_id;
 	int cluster_id;
 	int seqnum_id;
+	int instance_id;
 	char name[32];
 	u16  agent_type_mask;
 
@@ -90,7 +93,8 @@ struct uncore_data {
 	struct kobj_attribute elc_floor_freq_khz_kobj_attr;
 	struct kobj_attribute agent_types_kobj_attr;
 	struct kobj_attribute die_id_kobj_attr;
-	struct attribute *uncore_attrs[15];
+	struct kobj_attribute instance_id_kobj_attr;
+	struct attribute *uncore_attrs[16];
 };
 
 #define UNCORE_DOMAIN_ID_INVALID	-1
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
index 1237d9570886..32d03bee09a0 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
@@ -385,7 +385,19 @@ static u8 io_die_index_next;
 /* Lock to protect io_die_start, io_die_index_next */
 static DEFINE_MUTEX(domain_lock);
 
-static void set_domain_id(int id,  int num_resources,
+static inline void set_instance_id(int id, struct tpmi_uncore_cluster_info *cluster_info)
+{
+	/*
+	 * On non-partitioned systems domain_id can be used for mapping both
+	 * CPUs to compute die IDs and physical die indexes to MMIO mapped
+	 * memory. However on partitioned systems domain_id loses the second
+	 * association. Therefore instance_id should be used for that instead,
+	 * while domain_id should still be used to match CPUs to compute dies.
+	 */
+	cluster_info->uncore_data.instance_id = id;
+}
+
+static void set_domain_id(int id, int num_resources,
 			  struct oobmsm_plat_info *plat_info,
 			  struct tpmi_uncore_cluster_info *cluster_info)
 {
@@ -686,6 +698,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_
 			set_cdie_id(i, cluster_info, plat_info);
 
 			set_domain_id(i, num_resources, plat_info, cluster_info);
+			set_instance_id(i, cluster_info);
 
 			cluster_info->uncore_root = tpmi_uncore;
 
-- 
2.53.0



^ permalink raw reply related

* [PATCH v3 0/2] platform/x86/intel-uncore-freq: Expose instance ID in the sysfs
From: Maciej Wieczor-Retman @ 2026-04-08 16:27 UTC (permalink / raw)
  To: skhan, ilpo.jarvinen, hansg, corbet, srinivas.pandruvada
  Cc: linux-kernel, platform-driver-x86, linux-doc, m.wieczorretman

--- Motivation

This patchset is about exporting instance ID, a value used to uniquely
identify MMIO blocks in TPMI devices. Userspace tools like "pepc" [1]
can use it for direct MMIO reads or writes.

Currently exported information allows doing this on non-partitioned
systems, but partitioned systems require additional steps to map MMIO
blocks.

[1] https://github.com/intel/pepc

--- Background

* TPMI MMIO organization
For each TPMI device a direct register access is possible through MMIO
mapped blocks, where:
- Each block belongs to a different power domain.
- Each power domain is exposed in sysfs via a domain_id attribute.
- Power domain scope is per-die (either IO dies or compute dies).
- Compute die blocks are ordered first, before IO die blocks in
  MMIO space.

* Domain ID mapping
For compute dies, the mapping is architectural through a CPUID leaf or
via MSR 0x54:
- Compute die IDs directly correspond to CPU die IDs
- CPU die ID can be obtained from MSR 0x54 or recent CPUID leaves
- Example: domain_id equal to 1 applies to all CPUs with die ID 1

* IO die mapping
For IO dies, the relationship is generation/platform specific. It's
generally not recommended to assume any specific IO organization but
uncore sysfs provides an attribute to differentiate die types.

* Partitioning
In partitioned systems multiple TPMI devices exist per package. However
CPUs are still enumerated package-wide and so die IDs (domain_id) are
unique per-package. For example a single partition (single TPMI device)
Granite Rapids might order its dies in the following way:

+---------------------+-----------+
| Die type and number | Domain ID |
+---------------------+-----------+
| Compute die 0	      |         0 |
| Compute die 1       |         1 |
| IO die 0            |         2 |
| IO die 1            |         3 |
+---------------------+-----------+

While a two partition system may be numbered in this way:

+---------------------+-------------+-------------+
| Die type and number |         Domain ID         |
| local in single     +-------------+-------------+
| partition scope     | Partition 0 | Partition 1 |
+---------------------+-------------+-------------+
| Compute die 0	      |           0 |           2 |
| Compute die 1       |           1 |           3 |
| IO die 0            |           4 |           6 |
| IO die 1            |           5 |           7 |
+---------------------+-------------+-------------+

The cd_mask value from the TPMI bus info register can show using a
bitmap which compute dies belong to which partition.

* Instance ID
Partition ID is not an architectural value, meaning there is no CPUID or
MSR to map a CPU to a partition number. Therefore to allow mapping CPUs
to compute dies as well as mapping TPMI registers in MMIO mapped space
two numbers need to be exported:
- domain_id
	- Whether the system is partitioned or not it still allows
	  mapping CPUs to compute die IDs.
- instance_id
	- A per-partition (and hence per-device) physical index to still
	  allow mapping MMIO blocks to both compute and IO dies. On
	  partitioned systems mapping IO dies would be very difficult
	  since they are only indexed after all the compute dies are
	  numbered.

As one can see, on non-partitioned systems the instance ID and domain ID
have the same value. It's only on partitioned systems that both values
are needed to keep all mapping functionality. To better show the
relationship this is how values on a partitioned system can look:

+---------------------+-------------+-------------+-------------+-------------+
| Die type and number |         Domain ID         |        Instance ID        |
| local in single     +-------------+-------------+-------------+-------------+
| partition scope     | Partition 0 | Partition 1 | Partition 0 | Partition 1 |
+---------------------+-------------+-------------+-------------+-------------+
| Compute die 0	      |           0 |           2 |           0 |           0 |
| Compute die 1       |           1 |           3 |           1 |           1 |
| IO die 0            |           4 |           6 |           2 |           2 |
| IO die 1            |           5 |           7 |           3 |           3 |
+---------------------+-------------+-------------+-------------+-------------+

Changes in v3:
- Remove sentence from the cover letter claiming that the motivation was
  to replace doing the same thing through MSRs - that was deprecated and
  it's not available.
- sprintf() -> sysfs_emit() in show_instance_id().
- static -> static inline in set_instance_id().
- Small correction to 2/2 patch message.

Maciej Wieczor-Retman (2):
  platform/x86/intel-uncore-freq: Rename instance_id
  platform/x86/intel-uncore-freq: Expose instance ID in the sysfs

 .../pm/intel_uncore_frequency_scaling.rst        |  7 +++++++
 .../uncore-frequency/uncore-frequency-common.c   | 16 +++++++++++++---
 .../uncore-frequency/uncore-frequency-common.h   |  8 ++++++--
 .../uncore-frequency/uncore-frequency-tpmi.c     | 15 ++++++++++++++-
 4 files changed, 40 insertions(+), 6 deletions(-)

-- 
2.53.0



^ permalink raw reply

* htmldocs: Warning: sound/soc/codecs/tas67524.c references a file that doesn't exist: Documentation/sound/codecs/tas675x.rst
From: kernel test robot @ 2026-04-08 16:13 UTC (permalink / raw)
  To: Sen Wang; +Cc: oe-kbuild-all, 0day robot, linux-doc

tree:   https://github.com/intel-lab-lkp/linux/commits/Sen-Wang/ASoC-dt-bindings-Add-ti-tas67524/20260408-141601
head:   6d18e62ff6aa71d56585dca8035437bc9218eb19
commit: 6e3145ebbb92b213c028232cad30d7d99d2ecdbd ASoC: codecs: Add TAS67524 quad-channel audio amplifier driver
date:   10 hours ago
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
docutils: docutils (Docutils 0.21.2, Python 3.13.5, on linux)
reproduce: (https://download.01.org/0day-ci/archive/20260408/202604081804.ImZjoifC-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202604081804.ImZjoifC-lkp@intel.com/

All warnings (new ones prefixed by >>):

   Warning: Documentation/translations/zh_CN/scsi/scsi_mid_low_api.rst references a file that doesn't exist: Documentation/Configure.help
   Warning: MAINTAINERS references a file that doesn't exist: Documentation/ABI/testing/sysfs-platform-ayaneo
   Warning: MAINTAINERS references a file that doesn't exist: Documentation/devicetree/bindings/display/bridge/megachips-stdpxxxx-ge-b850v3-fw.txt
   Warning: arch/powerpc/sysdev/mpic.c references a file that doesn't exist: Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
   Warning: rust/kernel/sync/atomic/ordering.rs references a file that doesn't exist: srctree/tools/memory-model/Documentation/explanation.txt
>> Warning: sound/soc/codecs/tas67524.c references a file that doesn't exist: Documentation/sound/codecs/tas675x.rst
   Warning: tools/docs/documentation-file-ref-check references a file that doesn't exist: Documentation/virtual/lguest/lguest.c
   Warning: tools/docs/documentation-file-ref-check references a file that doesn't exist: m,\b(\S*)(Documentation/[A-Za-z0-9
   Warning: tools/docs/documentation-file-ref-check references a file that doesn't exist: Documentation/devicetree/dt-object-internal.txt
   Warning: tools/docs/documentation-file-ref-check references a file that doesn't exist: m,^Documentation/scheduler/sched-pelt
   Warning: tools/docs/documentation-file-ref-check references a file that doesn't exist: m,(Documentation/translations/[

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply

* Re: [PATCH 01/24] filelock: add support for ignoring deleg breaks for dir change events
From: Jeff Layton @ 2026-04-08 14:29 UTC (permalink / raw)
  To: Jan Kara
  Cc: Alexander Viro, Christian Brauner, Chuck Lever, Alexander Aring,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Jonathan Corbet, Shuah Khan, NeilBrown, Olga Kornievskaia,
	Dai Ngo, Tom Talpey, Trond Myklebust, Anna Schumaker,
	Amir Goldstein, Calum Mackay, linux-fsdevel, linux-kernel,
	linux-trace-kernel, linux-doc, linux-nfs
In-Reply-To: <snnggefctfffpb3rsyhjdwmxozqdklqmweiojmxy7owettksgz@6vud2iacgeqc>

On Wed, 2026-04-08 at 15:45 +0200, Jan Kara wrote:
> On Tue 07-04-26 09:21:14, Jeff Layton wrote:
> > If a NFS client requests a directory delegation with a notification
> > bitmask covering directory change events, the server shouldn't recall
> > the delegation. Instead the client will be notified of the change after
> > the fact.
> > 
> > Add support for ignoring lease breaks on directory changes. Add a new
> > flags parameter to try_break_deleg() and teach __break_lease how to
> > ignore certain types of delegation break events.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> 
> Looks good. Feel free to add:
> 
> Reviewed-by: Jan Kara <jack@suse.cz>
> 
> > @@ -222,6 +225,10 @@ struct file_lease *locks_alloc_lease(void);
> >  #define LEASE_BREAK_LAYOUT		BIT(2)	// break layouts only
> >  #define LEASE_BREAK_NONBLOCK		BIT(3)	// non-blocking break
> >  #define LEASE_BREAK_OPEN_RDONLY		BIT(4)	// readonly open event
> > +#define LEASE_BREAK_DIR_CREATE		BIT(6)  // dir deleg create event
> > +#define LEASE_BREAK_DIR_DELETE		BIT(7)  // dir deleg delete event
> > +#define LEASE_BREAK_DIR_RENAME		BIT(8)  // dir deleg rename event
> 
> Just curious why you've left out bit 5 here... :)
> 
> 								Honza

No reason. I've had this series for a couple of years now, and I think
bit 5 got removed at some point after I originally did this patch, and
I didn't notice when I fixed up the conflict. I'll plan to renumber
this for neatness sake.

Thanks for the review!
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply

* Re: [PATCH v2] Documentation: gpio: update the preferred method for using software node lookup
From: Bartosz Golaszewski @ 2026-04-08 13:55 UTC (permalink / raw)
  To: Linus Walleij, Bartosz Golaszewski, Jonathan Corbet, Shuah Khan,
	Dmitry Torokhov, Bartosz Golaszewski
  Cc: linux-gpio, linux-doc, linux-kernel
In-Reply-To: <20260403-doc-gpio-swnodes-v2-1-c705f5897b80@oss.qualcomm.com>


On Fri, 03 Apr 2026 15:04:55 +0200, Bartosz Golaszewski wrote:
> In its current version, the manual for converting of board files from
> using GPIO lookup tables to software nodes recommends leaving the
> software nodes representing GPIO controllers as "free-floating", not
> attached objects and relying on the matching of their names against the
> GPIO controller's name. This is an abuse of the software node API and
> makes it impossible to create fw_devlinks between GPIO suppliers and
> consumers in this case. We want to remove this behavior from GPIOLIB and
> to this end, work on converting all existing drivers to using "attached"
> software nodes.
> 
> [...]

Applied, thanks!

[1/1] Documentation: gpio: update the preferred method for using software node lookup
      https://git.kernel.org/brgl/c/d129779da5e3f8878e105fb3ca8519d9ff759a91

Best regards,
-- 
Bartosz Golaszewski <bartosz.golaszewski@oss.qualcomm.com>

^ permalink raw reply

* Re: [PATCH 00/24] vfs/nfsd: add support for CB_NOTIFY callbacks in directory delegations
From: Jan Kara @ 2026-04-08 13:55 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein, Calum Mackay, linux-fsdevel,
	linux-kernel, linux-trace-kernel, linux-doc, linux-nfs
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

On Tue 07-04-26 09:21:13, Jeff Layton wrote:
> This patchset builds on the directory delegation work we did a few
> months ago, to add support for CB_NOTIFY callbacks for some events. In
> particular, creates, unlinks and renames. The server also sends updated
> directory attributes in the notifications. With this support, the client
> can register interest in a directory and get notifications about changes
> within it without losing its lease.
> 
> The series starts with patches to allow the vfs to ignore certain types
> of events on directories. nfsd can then request these sorts of
> delegations on directories, and then set up inotify watches on the
> directory to trigger sending CB_NOTIFY events.
> 
> This has mainly been tested with pynfs, with some new testcases that
> I'll be posting soon. They seem to work fine with those tests, but I
> don't think we'll want to merge these until we have a complete
> client-side implementation to test against.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

The fsnotify changes and generic file locking changes look OK to me. I
don't feel confident enough with NFSD stuff to really review that :)

								Honza

> ---
> Jeff Layton (24):
>       filelock: add support for ignoring deleg breaks for dir change events
>       filelock: add a tracepoint to start of break_lease()
>       filelock: add an inode_lease_ignore_mask helper
>       nfsd: add protocol support for CB_NOTIFY
>       nfs_common: add new NOTIFY4_* flags proposed in RFC8881bis
>       nfsd: allow nfsd to get a dir lease with an ignore mask
>       vfs: add fsnotify_modify_mark_mask()
>       nfsd: update the fsnotify mark when setting or removing a dir delegation
>       nfsd: make nfsd4_callback_ops->prepare operation bool return
>       nfsd: add callback encoding and decoding linkages for CB_NOTIFY
>       nfsd: use RCU to protect fi_deleg_file
>       nfsd: add data structures for handling CB_NOTIFY
>       nfsd: add notification handlers for dir events
>       nfsd: add tracepoint to dir_event handler
>       nfsd: apply the notify mask to the delegation when requested
>       nfsd: add helper to marshal a fattr4 from completed args
>       nfsd: allow nfsd4_encode_fattr4_change() to work with no export
>       nfsd: send basic file attributes in CB_NOTIFY
>       nfsd: allow encoding a filehandle into fattr4 without a svc_fh
>       nfsd: add a fi_connectable flag to struct nfs4_file
>       nfsd: add the filehandle to returned attributes in CB_NOTIFY
>       nfsd: properly track requested child attributes
>       nfsd: track requested dir attributes
>       nfsd: add support to CB_NOTIFY for dir attribute changes
> 
>  Documentation/sunrpc/xdr/nfs4_1.x    | 264 ++++++++++++++-
>  fs/attr.c                            |   2 +-
>  fs/locks.c                           |  89 +++++-
>  fs/namei.c                           |  31 +-
>  fs/nfsd/filecache.c                  |  57 +++-
>  fs/nfsd/nfs4callback.c               |  60 +++-
>  fs/nfsd/nfs4layouts.c                |   5 +-
>  fs/nfsd/nfs4proc.c                   |  15 +
>  fs/nfsd/nfs4state.c                  | 524 ++++++++++++++++++++++++++----
>  fs/nfsd/nfs4xdr.c                    | 300 ++++++++++++++---
>  fs/nfsd/nfs4xdr_gen.c                | 601 ++++++++++++++++++++++++++++++++++-
>  fs/nfsd/nfs4xdr_gen.h                |  20 +-
>  fs/nfsd/state.h                      |  70 +++-
>  fs/nfsd/trace.h                      |  21 ++
>  fs/nfsd/xdr4.h                       |   5 +
>  fs/nfsd/xdr4cb.h                     |  12 +
>  fs/notify/mark.c                     |  29 ++
>  fs/posix_acl.c                       |   4 +-
>  fs/xattr.c                           |   4 +-
>  include/linux/filelock.h             |  54 +++-
>  include/linux/fsnotify_backend.h     |   1 +
>  include/linux/nfs4.h                 | 127 --------
>  include/linux/sunrpc/xdrgen/nfs4_1.h | 291 ++++++++++++++++-
>  include/trace/events/filelock.h      |  38 ++-
>  include/uapi/linux/nfs4.h            |   2 -
>  25 files changed, 2321 insertions(+), 305 deletions(-)
> ---
> base-commit: bd5b9fd5e3d55bc412cec4bebe5a11da2151de4a
> change-id: 20260325-dir-deleg-339066dd1017
> 
> Best regards,
> -- 
> Jeff Layton <jlayton@kernel.org>
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: (sashiko review) [PATCH v6 1/1] mm/damon: add node_eligible_mem_bp and node_ineligible_mem_bp goal metrics
From: SeongJae Park @ 2026-04-08 13:54 UTC (permalink / raw)
  To: Ravi Jonnalagadda
  Cc: SeongJae Park, damon, linux-mm, linux-kernel, linux-doc, akpm,
	corbet, bijan311, ajayjoshi, honggyu.kim, yunjeong.mun
In-Reply-To: <CALa+Y17YnrOe=UXWBMKJ1U6seKJuauDqAdTDYo1cCYnrP_vSFg@mail.gmail.com>

On Tue, 7 Apr 2026 19:33:43 -0700 Ravi Jonnalagadda <ravis.opensrc@gmail.com> wrote:

> On Tue, Apr 7, 2026 at 9:05 AM SeongJae Park <sj@kernel.org> wrote:
[...]
> Yes SJ. I think we can make it work with single goal now that the
> below commit is part of mainline. will give it a try and post an
> update.

Sounds good, please don't hesitate asking any questions.


Thanks,
SJ

[...]

^ permalink raw reply

* Re: [PATCH 08/24] nfsd: update the fsnotify mark when setting or removing a dir delegation
From: Jan Kara @ 2026-04-08 13:53 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein, Calum Mackay, linux-fsdevel,
	linux-kernel, linux-trace-kernel, linux-doc, linux-nfs
In-Reply-To: <20260407-dir-deleg-v1-8-aaf68c478abd@kernel.org>

On Tue 07-04-26 09:21:21, Jeff Layton wrote:
> Add a new helper function that will update the mask on the nfsd_file's
> fsnotify_mark to be a union of all current directory delegations on an
> inode. Call that when directory delegations are added or removed.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/nfsd/nfs4state.c | 33 +++++++++++++++++++++++++++++++++
>  1 file changed, 33 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index c8fb84c38637..9a4cff08c67d 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1258,6 +1258,37 @@ static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct f
>  	}
>  }
>  
> +static void nfsd_fsnotify_recalc_mask(struct nfsd_file *nf)
> +{
> +	struct fsnotify_mark *mark = &nf->nf_mark->nfm_mark;
> +	struct inode *inode = file_inode(nf->nf_file);
> +	u32 lease_mask, set = 0, clear = 0;
> +
> +	/* This is only needed when adding or removing dir delegs */
> +	if (!S_ISDIR(inode->i_mode))
> +		return;
> +
> +	/* Set up notifications for any ignored delegation events */
> +	lease_mask = inode_lease_ignore_mask(inode);
> +
> +	if (lease_mask & FL_IGN_DIR_CREATE)
> +		set |= FS_CREATE;
> +	else
> +		clear |= FS_CREATE;
> +
> +	if (lease_mask & FL_IGN_DIR_DELETE)
> +		set |= FS_DELETE;
> +	else
> +		clear |= FS_DELETE;
> +
> +	if (lease_mask & FL_IGN_DIR_RENAME)
> +		set |= FS_RENAME;
> +	else
> +		clear |= FS_RENAME;
> +
> +	fsnotify_modify_mark_mask(mark, set, clear);
> +}
> +
>  static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
>  {
>  	struct nfs4_file *fp = dp->dl_stid.sc_file;
> @@ -1266,6 +1297,7 @@ static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
>  	WARN_ON_ONCE(!fp->fi_delegees);
>  
>  	nfsd4_finalize_deleg_timestamps(dp, nf->nf_file);
> +	nfsd_fsnotify_recalc_mask(nf);
>  	kernel_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
>  	put_deleg_file(fp);
>  }
> @@ -9652,6 +9684,7 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
>  
>  	if (!status) {
>  		put_nfs4_file(fp);
> +		nfsd_fsnotify_recalc_mask(nf);
>  		return dp;
>  	}
>  
> 
> -- 
> 2.53.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCH 03/24] filelock: add an inode_lease_ignore_mask helper
From: Jan Kara @ 2026-04-08 13:53 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein, Calum Mackay, linux-fsdevel,
	linux-kernel, linux-trace-kernel, linux-doc, linux-nfs
In-Reply-To: <20260407-dir-deleg-v1-3-aaf68c478abd@kernel.org>

On Tue 07-04-26 09:21:16, Jeff Layton wrote:
> Add a new routine that returns a mask of all dir change events that are
> currently ignored by any leases. nfsd will use this to determine how to
> configure the fsnotify_mark mask.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/locks.c               | 32 ++++++++++++++++++++++++++++++++
>  include/linux/filelock.h |  1 +
>  2 files changed, 33 insertions(+)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index 5af6dca2d46c..04980b065734 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -1597,6 +1597,38 @@ any_leases_conflict(struct inode *inode, struct file_lease *breaker)
>  	return false;
>  }
>  
> +#define IGNORE_MASK	(FL_IGN_DIR_CREATE | FL_IGN_DIR_DELETE | FL_IGN_DIR_RENAME)
> +
> +/**
> + * inode_lease_ignore_mask - return union of all ignored inode events for this inode
> + * @inode: inode of which to get ignore mask
> + *
> + * Walk the list of leases, and return the result of all of
> + * their FL_IGN_DIR_* bits or'ed together.
> + */
> +u32
> +inode_lease_ignore_mask(struct inode *inode)
> +{
> +	struct file_lock_context *ctx;
> +	struct file_lock_core *flc;
> +	u32 mask = 0;
> +
> +	ctx = locks_inode_context(inode);
> +	if (!ctx)
> +		return 0;
> +
> +	spin_lock(&ctx->flc_lock);
> +	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
> +		mask |= flc->flc_flags & IGNORE_MASK;
> +		/* If we already have everything, we can stop */
> +		if (mask == IGNORE_MASK)
> +			break;
> +	}
> +	spin_unlock(&ctx->flc_lock);
> +	return mask;
> +}
> +EXPORT_SYMBOL_GPL(inode_lease_ignore_mask);
> +
>  static bool
>  ignore_dir_deleg_break(struct file_lease *fl, unsigned int flags)
>  {
> diff --git a/include/linux/filelock.h b/include/linux/filelock.h
> index 5a19cdb047da..416483b136f1 100644
> --- a/include/linux/filelock.h
> +++ b/include/linux/filelock.h
> @@ -236,6 +236,7 @@ int generic_setlease(struct file *, int, struct file_lease **, void **priv);
>  int kernel_setlease(struct file *, int, struct file_lease **, void **);
>  int vfs_setlease(struct file *, int, struct file_lease **, void **);
>  int lease_modify(struct file_lease *, int, struct list_head *);
> +u32 inode_lease_ignore_mask(struct inode *inode);
>  
>  struct notifier_block;
>  int lease_register_notifier(struct notifier_block *);
> 
> -- 
> 2.53.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCH 07/24] vfs: add fsnotify_modify_mark_mask()
From: Jan Kara @ 2026-04-08 13:51 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein, Calum Mackay, linux-fsdevel,
	linux-kernel, linux-trace-kernel, linux-doc, linux-nfs
In-Reply-To: <20260407-dir-deleg-v1-7-aaf68c478abd@kernel.org>

On Tue 07-04-26 09:21:20, Jeff Layton wrote:
> nfsd needs to be able to modify the mask on an existing mark when new
> directory delegations are set or unset. Add an exported function that
> allows the caller to set and clear bits in the mark->mask, and does
> the recalculation if something changed.
> 
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza


> ---
>  fs/notify/mark.c                 | 29 +++++++++++++++++++++++++++++
>  include/linux/fsnotify_backend.h |  1 +
>  2 files changed, 30 insertions(+)
> 
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index c2ed5b11b0fe..b1e73c6fd382 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -310,6 +310,35 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
>  		fsnotify_conn_set_children_dentry_flags(conn);
>  }
>  
> +/**
> + * fsnotify_modify_mark_mask - set and/or clear flags in a mark's mask
> + * @mark: mark to be modified
> + * @set: bits to be set in mask
> + * @clear: bits to be cleared in mask
> + *
> + * Modify a fsnotify_mark mask as directed, and update its associated conn.
> + * The caller is expected to hold a reference to the mark.
> + */
> +void fsnotify_modify_mark_mask(struct fsnotify_mark *mark, u32 set, u32 clear)
> +{
> +	bool recalc = false;
> +	u32 mask;
> +
> +	WARN_ON_ONCE(clear & set);
> +
> +	spin_lock(&mark->lock);
> +	mask = mark->mask;
> +	mark->mask |= set;
> +	mark->mask &= ~clear;
> +	if (mark->mask != mask)
> +		recalc = true;
> +	spin_unlock(&mark->lock);
> +
> +	if (recalc)
> +		fsnotify_recalc_mask(mark->connector);
> +}
> +EXPORT_SYMBOL_GPL(fsnotify_modify_mark_mask);
> +
>  /* Free all connectors queued for freeing once SRCU period ends */
>  static void fsnotify_connector_destroy_workfn(struct work_struct *work)
>  {
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index 95985400d3d8..66e185bd1b1b 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -917,6 +917,7 @@ extern void fsnotify_get_mark(struct fsnotify_mark *mark);
>  extern void fsnotify_put_mark(struct fsnotify_mark *mark);
>  extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
>  extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
> +extern void fsnotify_modify_mark_mask(struct fsnotify_mark *mark, u32 set, u32 clear);
>  
>  static inline void fsnotify_init_event(struct fsnotify_event *event)
>  {
> 
> -- 
> 2.53.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCH 02/24] filelock: add a tracepoint to start of break_lease()
From: Jan Kara @ 2026-04-08 13:45 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein, Calum Mackay, linux-fsdevel,
	linux-kernel, linux-trace-kernel, linux-doc, linux-nfs
In-Reply-To: <20260407-dir-deleg-v1-2-aaf68c478abd@kernel.org>

On Tue 07-04-26 09:21:15, Jeff Layton wrote:
> ...mostly to show the LEASE_BREAK_* flags.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

OK. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/locks.c                      |  2 ++
>  include/trace/events/filelock.h | 33 +++++++++++++++++++++++++++++++++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index dafa0752fdce..5af6dca2d46c 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -1654,6 +1654,8 @@ int __break_lease(struct inode *inode, unsigned int flags)
>  	bool want_write = !(flags & LEASE_BREAK_OPEN_RDONLY);
>  	int error = 0;
>  
> +	trace_break_lease(inode, flags);
> +
>  	if (flags & LEASE_BREAK_LEASE)
>  		type = FL_LEASE;
>  	else if (flags & LEASE_BREAK_DELEG)
> diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
> index ef4bb0afb86a..fff0ee2d452d 100644
> --- a/include/trace/events/filelock.h
> +++ b/include/trace/events/filelock.h
> @@ -120,6 +120,39 @@ DEFINE_EVENT(filelock_lock, flock_lock_inode,
>  		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
>  		TP_ARGS(inode, fl, ret));
>  
> +#define show_lease_break_flags(val)					\
> +	__print_flags(val, "|",						\
> +		{ LEASE_BREAK_LEASE,		"LEASE" },		\
> +		{ LEASE_BREAK_DELEG,		"DELEG" },		\
> +		{ LEASE_BREAK_LAYOUT,		"LAYOUT" },		\
> +		{ LEASE_BREAK_NONBLOCK,		"NONBLOCK" },		\
> +		{ LEASE_BREAK_OPEN_RDONLY,	"OPEN_RDONLY" },	\
> +		{ LEASE_BREAK_DIR_CREATE,	"DIR_CREATE" },		\
> +		{ LEASE_BREAK_DIR_DELETE,	"DIR_DELETE" },		\
> +		{ LEASE_BREAK_DIR_RENAME,	"DIR_RENAME" })
> +
> +TRACE_EVENT(break_lease,
> +	TP_PROTO(struct inode *inode, unsigned int flags),
> +
> +	TP_ARGS(inode, flags),
> +
> +	TP_STRUCT__entry(
> +		__field(unsigned long, i_ino)
> +		__field(dev_t, s_dev)
> +		__field(unsigned int, flags)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->s_dev = inode->i_sb->s_dev;
> +		__entry->i_ino = inode->i_ino;
> +		__entry->flags = flags;
> +	),
> +
> +	TP_printk("dev=0x%x:0x%x ino=0x%lx flags=%s",
> +		  MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
> +		  __entry->i_ino, show_lease_break_flags(__entry->flags))
> +);
> +
>  DECLARE_EVENT_CLASS(filelock_lease,
>  	TP_PROTO(struct inode *inode, struct file_lease *fl),
>  
> 
> -- 
> 2.53.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCH 01/24] filelock: add support for ignoring deleg breaks for dir change events
From: Jan Kara @ 2026-04-08 13:45 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein, Calum Mackay, linux-fsdevel,
	linux-kernel, linux-trace-kernel, linux-doc, linux-nfs
In-Reply-To: <20260407-dir-deleg-v1-1-aaf68c478abd@kernel.org>

On Tue 07-04-26 09:21:14, Jeff Layton wrote:
> If a NFS client requests a directory delegation with a notification
> bitmask covering directory change events, the server shouldn't recall
> the delegation. Instead the client will be notified of the change after
> the fact.
> 
> Add support for ignoring lease breaks on directory changes. Add a new
> flags parameter to try_break_deleg() and teach __break_lease how to
> ignore certain types of delegation break events.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

> @@ -222,6 +225,10 @@ struct file_lease *locks_alloc_lease(void);
>  #define LEASE_BREAK_LAYOUT		BIT(2)	// break layouts only
>  #define LEASE_BREAK_NONBLOCK		BIT(3)	// non-blocking break
>  #define LEASE_BREAK_OPEN_RDONLY		BIT(4)	// readonly open event
> +#define LEASE_BREAK_DIR_CREATE		BIT(6)  // dir deleg create event
> +#define LEASE_BREAK_DIR_DELETE		BIT(7)  // dir deleg delete event
> +#define LEASE_BREAK_DIR_RENAME		BIT(8)  // dir deleg rename event

Just curious why you've left out bit 5 here... :)

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [PATCH] crash: Support high memory reservation for range syntax
From: Baoquan He @ 2026-04-08 13:32 UTC (permalink / raw)
  To: Youling Tang, Sourabh Jain
  Cc: Andrew Morton, Jonathan Corbet, Vivek Goyal, Dave Young, kexec,
	linux-kernel, linux-doc, Youling Tang
In-Reply-To: <d584d383-1862-417d-9251-153d9bcf5626@linux.ibm.com>

On 04/08/26 at 10:01am, Sourabh Jain wrote:
> Hello Youling,
> 
> On 04/04/26 13:11, Youling Tang wrote:
> > From: Youling Tang <tangyouling@kylinos.cn>
> > 
> > The crashkernel range syntax (range1:size1[,range2:size2,...]) allows
> > automatic size selection based on system RAM, but it always reserves
> > from low memory. When a large crashkernel is selected, this can
> > consume most of the low memory, causing subsequent hardware
> > hotplug or drivers requiring low memory to fail due to allocation
> > failures.
> 
> 
> Support for high crashkernel reservation has been added to
> address the above problem.
> 
> However, high crashkernel reservation is not supported with
> range-based crashkernel kernel command-line arguments.
> For example: crashkernel=0M-1G:100M,1G-4G:160M,4G-8G:192M
> 
> Many users, including some distributions, use range-based
> crashkernel configuration. So, adding support for high crashkernel
> reservation with range-based configuration would be useful.

Sorry for late response. And I have to say sorry because I have some
negative tendency on this change. 

We use crashkernel=xM|G and crashkernel=range1:size1[,range2:size2,...]
as default setting, so that people only need to set suggested amount
of memory. While crashkernel=,high|low is for advanced user to customize 
their crashkernel value. In that case, user knows what's high memory and
low memory, and how much is needed separately to achieve their goal, e.g
saving low memory, taking away more high memory.

To be honest, above grammers sounds simple, right? I believe both of you
know very well how complicated the current crashkernel code is. I would
suggest not letting them becomre more and more complicated by extending
the grammer further and further. Unless you meet unavoidable issue with
the existing grammer.

Here comes my question, do you meet unavoidable issue with the existing
grammer when you use crashkernel=range1:size1[,range2:size2,...] and
think it's not satisfactory, and at the same time crashkernel=,high|low
can't meet your demand either?

Thanks
Baoquan


^ permalink raw reply

* Re: [PATCH v10 12/21] gpu: nova-core: mm: Add unified page table entry wrapper enums
From: Eliot Courtney @ 2026-04-08 13:26 UTC (permalink / raw)
  To: Joel Fernandes, Eliot Courtney, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, joel, linux-doc, amd-gfx, intel-gfx,
	intel-xe, linux-fbdev
In-Reply-To: <537a8c5a-3885-4c47-99f6-963b48ddf87d@nvidia.com>

On Tue Apr 7, 2026 at 10:59 PM JST, Joel Fernandes wrote:
> Hi Eliot,
>
> On 4/7/2026 9:42 AM, Eliot Courtney wrote:
>> On Tue Apr 7, 2026 at 6:55 AM JST, Joel Fernandes wrote:
>>>>> +    /// Compute upper bound on page table pages needed for `num_virt_pages`.
>>>>> +    ///
>>>>> +    /// Walks from PTE level up through PDE levels, accumulating the tree.
>>>>> +    pub(crate) fn pt_pages_upper_bound(&self, num_virt_pages: usize) -> usize {
>>>>> +        let mut total = 0;
>>>>> +
>>>>> +        // PTE pages at the leaf level.
>>>>> +        let pte_epp = self.entries_per_page(self.pte_level());
>>>>> +        let mut pages_at_level = num_virt_pages.div_ceil(pte_epp);
>>>>> +        total += pages_at_level;
>>>>> +
>>>>> +        // Walk PDE levels bottom-up (reverse of pde_levels()).
>>>>> +        for &level in self.pde_levels().iter().rev() {
>>>>> +            let epp = self.entries_per_page(level);
>>>>> +
>>>>> +            // How many pages at this level do we need to point to
>>>>> +            // the previous pages_at_level?
>>>>> +            pages_at_level = pages_at_level.div_ceil(epp);
>>>>> +            total += pages_at_level;
>>>>> +        }
>>>>> +
>>>>> +        total
>>>>> +    }
>>>>> +}
>>>>> +
>>>>
>>>> We have a lot of matches on the MMU version here (and below in Pte, Pde,
>>>> DualPde). What about making MmuVersion into a trait (e.g. Mmu) with
>>>> associated types for Pte, Pde, DualPde which can implement traits
>>>> defining their common operations too?
>>>
>>> I coded this up and it did not look pretty, there's not much LOC savings and the
>>> code becomes harder to read because of parametrization of several functions. Also:
>> 
>> Thanks for looking into it. Sorry to be a bother, but would you have a
>> branch around with the code? I'm curious what didn't look good about it.
>
> Sorry but I already mentioned that above, the parameterizing of dozens of
> function call sites, 3-4 new traits (because each struct like
> Pte/Pde/DualPde etc each need their own trait which different MMU versions
> implement) etc. The code because hard to read and readability is the top
> critical criteria for me - I am personally strictly against "Lets use shiny
> features in language at the cost of making code unreadable". Because that
> translates into bugs and nightmare for maintainability.
>
> I don't have the code at the moment, but if you still want to spend on time
> on this direction, feel free to share a tree. I am happy to take a look.

I had a go at this, you can see the branch here [1] - it might not be
perfect, but I think the shape is directionally good. It's structured so
the HEAD commit has the diff from the current approach to the
parametrised approach. The main decision is where to do the type
erasure, I chose in `Vmm` since it looks like the main top level API for
this code, but could do `BarUser` instead. I think it's overall better.
I also think Alex's point about associated types making it easier to use
the appropriate Bounded type is a good one.

[1]: https://github.com/Edgeworth/linux/commits/review/nova-mm-v10/

>>>> Then you can parameterise Vmm/PtWalk on this type.
>>>
>>> The match still to be done somewhere, so you end up matching on chipset to call
>>> the correct parametrized functions versus just passing in the parameter or
>>> chipset down, in some cases.
>>>
>>> For now I am inclined to leave it as is. Also there's a Rust pitfall we all
>>> learnt during the turing and other patch reviews, sometimes doing a bunch of
>>> matches is good especially if the number of variants are expected to be fixed
>>> (in the mm case, version 2 and version 3). Traits have some disadvantages too,
>>> example dyn traits have to heap-allocated, parametrizing can increase code size
>>> (due to monomorphization) etc.
>> 
>> Yeah, it's just this is a lot of matches in a lot of places. And we have
>> ver2 / ver3 specific code leaking into the general pagetable.rs file. So
>
> That's not a leak, that's by design. pagetable.rs is where the matches are
> centralized, most of the code changes here on out should happen outside of
> this file.
>
> 31 out of 42 matches in the mm code are in pagetable.rs, so it is already
> centralized.
>
>> it would be really nice if we could find a way to improve this specific
>> aspect. We can reduce the match to happening in just one file. 
>
> Assuming we know what we're improving. ;-)
>
>> You can> avoid heap allocation if you would like by making Vmm an enum,
>> for example, and doing the match based dispatch there at the top of the
>> API tree, rather than at the bottom where it fans out into a lot more
>> locations.
>
> heap allocation is not always free, this code sensitive to dynamic
> allocations in the kernel, due to MM reclaim and locking. I would like to
> keep it simple.

If you do the type erasure via enum in Vmm, you won't need to allocate
it on the heap. The branch I posted above has an example on how to do
this, although there might be a better way.

>
> thanks,
>
> --
> Joel Fernandes


^ permalink raw reply

* Re: [PATCH v8 0/2] PCI: s390: Expose the UID as an arch specific PCI slot attribute
From: Vasily Gorbik @ 2026-04-08 12:18 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Niklas Schnelle, Jonathan Corbet, Lukas Wunner, Shuah Khan,
	Farhan Ali, Alexander Gordeev, Christian Borntraeger,
	Gerald Schaefer, Gerd Bayer, Heiko Carstens, Julian Ruess,
	Matthew Rosato, Peter Oberparleiter, Ramesh Errabolu,
	Sven Schnelle, linux-doc, linux-kernel, linux-pci, linux-s390,
	Randy Dunlap
In-Reply-To: <20260407-uid_slot-v8-0-15ae4409d2ce@linux.ibm.com>

On Tue, Apr 07, 2026 at 03:24:44PM +0200, Niklas Schnelle wrote:
> Add a mechanism for architecture specific attributes on
> PCI slots in order to add the user-defined ID (UID) as an s390 specific
> PCI slot attribute. First though improve some issues with the s390 specific
> documentation of PCI sysfs attributes noticed during development.

> Niklas Schnelle (2):
>       docs: s390/pci: Improve and update PCI documentation
>       PCI: s390: Expose the UID as an arch specific PCI slot attribute
> 
>  Documentation/arch/s390/pci.rst | 151 +++++++++++++++++++++++++++-------------
>  arch/s390/include/asm/pci.h     |   4 ++
>  arch/s390/pci/pci_sysfs.c       |  20 ++++++
>  drivers/pci/slot.c              |  13 +++-
>  4 files changed, 140 insertions(+), 48 deletions(-)

Bjorn, would you like to take this through the PCI tree? I think Niklas
phrased the subject with that in mind.

Otherwise, I can take it through the s390 tree. If so, could you give
me your Acked-by?

^ permalink raw reply

* Re: [PATCH 0/9] Kernel API Specification Framework
From: Geert Uytterhoeven @ 2026-04-08 12:05 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Jakub Kicinski, linux-api, linux-kernel, linux-doc, linux-fsdevel,
	linux-kbuild, linux-kselftest, workflows, tools, x86,
	Thomas Gleixner, Paul E. McKenney, Greg Kroah-Hartman,
	Jonathan Corbet, Dmitry Vyukov, Randy Dunlap, Cyril Hrubis,
	Kees Cook, Jake Edge, David Laight, Askar Safin, Gabriele Paoloni,
	Mauro Carvalho Chehab, Christian Brauner, Alexander Viro,
	Andrew Morton, Masahiro Yamada, Shuah Khan, Ingo Molnar,
	Arnd Bergmann
In-Reply-To: <abZTg9ZwnE5J4qXa@laps>

Hi Sasha,

On Sun, 15 Mar 2026 at 07:36, Sasha Levin <sashal@kernel.org> wrote:
> On Sat, Mar 14, 2026 at 11:18:22AM -0700, Jakub Kicinski wrote:
> >On Fri, 13 Mar 2026 11:09:10 -0400 Sasha Levin wrote:
> >> This enables static analysis tools to verify userspace API usage at compile
> >> time, test generation based on formal specifications, consistent error handling
> >> validation, automated documentation generation, and formal verification of
> >> kernel interfaces.
> >
> >Could you give some examples? We have machine readable descriptions for
> >Netlink interfaces, we approached syzbot folks and they did not really
> >seem to care for those.
>
> Once the API is in a machine-readable format, we can write formatters to
> output whatever downstream tools need. The kapi tool in the series
> already ships with plain text, JSON, and RST formatters, and adding new
> output formats is straightforward. We don't need to convince the
> syzkaller folks to consume our specs, we can just output them in a
> format that syzkaller already understands.
>
> For example, I have a syzlang formatter that produces the following
> from the sys_read spec in this series:
>
>    # --- read ---
>    # Read data from a file descriptor
>    #
>    # @context process, sleepable
>    #
>    # @capability CAP_DAC_OVERRIDE: Bypass discretionary access control on read permission
>    # @capability CAP_DAC_READ_SEARCH: Bypass read permission checks on regular files
>    #
>    # @error EPERM (-1): Returned by fanotify permission events...
>    # @error EINTR (-4): The call was interrupted by a signal before any data was read.
>    # @error EIO (-5): A low-level I/O error occurred.
>    # @error EBADF (-9): fd is not a valid file descriptor, or fd was not opened for reading.
>    # @error EAGAIN (-11): O_NONBLOCK set and read would block.
>    # @error EACCES (-13): LSM denied the read operation via security_file_permission().
>    # @error EFAULT (-14): buf points outside the accessible address space.
>    # @error EISDIR (-21): fd refers to a directory.
>    # @error EINVAL (-22): fd not suitable for reading, O_DIRECT misaligned, count negative...
>    # @error ENODATA (-61): Data not available in cache...
>    # @error EOVERFLOW (-75): File position plus count would exceed LLONG_MAX.
>    # @error EOPNOTSUPP (-95): Read not supported for this file type...
>    # @error ENOBUFS (-105): Buffer too small for complete notification...

The actual E-values are positive, so I guess you want e.g. -EPERM?

Note that the actual errno values are architecture-specific.
E.g. EOPNOTSUPP can be 45, 95, 122, or 223.

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox