* [PATCH v2 2/2] docs: kdoc_diff: add a helper tool to help checking kdoc regressions
2026-03-26 19:09 [PATCH v2 0/2] Add a script to check for kernel-doc regressions Mauro Carvalho Chehab
2026-03-26 19:09 ` [PATCH v2 1/2] tools: unittest_helper: add a quiet mode Mauro Carvalho Chehab
@ 2026-03-26 19:09 ` Mauro Carvalho Chehab
1 sibling, 0 replies; 3+ messages in thread
From: Mauro Carvalho Chehab @ 2026-03-26 19:09 UTC (permalink / raw)
To: Linux Doc Mailing List, Mauro Carvalho Chehab
Cc: Mauro Carvalho Chehab, linux-kernel, Jonathan Corbet, Shuah Khan
Checking for regressions at kernel-doc can be hard. Add a helper
tool to make such task easier.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
tools/docs/kdoc_diff | 508 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 508 insertions(+)
create mode 100755 tools/docs/kdoc_diff
diff --git a/tools/docs/kdoc_diff b/tools/docs/kdoc_diff
new file mode 100755
index 000000000000..1aa16bdccaa3
--- /dev/null
+++ b/tools/docs/kdoc_diff
@@ -0,0 +1,508 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917
+
+"""
+docdiff - Check differences between kernel‑doc output between two different
+commits.
+
+Examples
+--------
+
+Compare the kernel‑doc output between the last two 5.15 releases::
+
+ $ kdoc_diff v6.18..v6.19
+
+Both outputs are cached
+
+Force a complete documentation scan and clean any previous cache from
+6.19 to the current HEAD::
+
+ $ kdoc_diff 6.19.. --full --clean
+
+Check differences only on a single driver since origin/main::
+
+ $ kdoc_diff origin/main drivers/media
+
+Generate an YAML file and use it to check for regressions::
+
+ $ kdoc_diff HEAD~ drivers/media --regression
+
+
+"""
+
+import os
+import sys
+import argparse
+import subprocess
+import shutil
+import re
+import signal
+
+from glob import iglob
+
+
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../.."))
+
+KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc")
+KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py")
+
+CACHE_DIR = ".doc_diff_cache"
+YAML_NAME = "out.yaml"
+
+DIR_NAME = {
+ "full": os.path.join(CACHE_DIR, "full"),
+ "partial": os.path.join(CACHE_DIR, "partial"),
+ "no-cache": os.path.join(CACHE_DIR, "no_cache"),
+ "tmp": os.path.join(CACHE_DIR, "__tmp__"),
+}
+
+class GitHelper:
+ """Handles all Git operations"""
+
+ def __init__(self, work_dir=None):
+ self.work_dir = work_dir
+
+ def is_inside_repository(self):
+ """Check if we're inside a Git repository"""
+ try:
+ output = subprocess.check_output(["git", "rev-parse",
+ "--is-inside-work-tree"],
+ cwd=self.work_dir,
+ stderr=subprocess.STDOUT,
+ universal_newlines=True)
+
+ return output.strip() == "true"
+ except subprocess.CalledProcessError:
+ return False
+
+ def is_valid_commit(self, commit_hash):
+ """
+ Validate that a ref (branch, tag, commit hash, etc.) can be
+ resolved to a commit.
+ """
+ try:
+ subprocess.check_output(["git", "rev-parse", commit_hash],
+ cwd=self.work_dir,
+ stderr=subprocess.STDOUT)
+ return True
+ except subprocess.CalledProcessError:
+ return False
+
+ def get_short_hash(self, commit_hash):
+ """Get short commit hash"""
+ try:
+ return subprocess.check_output(["git", "rev-parse", "--short",
+ commit_hash],
+ cwd=self.work_dir,
+ stderr=subprocess.STDOUT,
+ universal_newlines=True).strip()
+ except subprocess.CalledProcessError:
+ return ""
+
+ def has_uncommitted_changes(self):
+ """Check for uncommitted changes"""
+ try:
+ subprocess.check_output(["git", "diff-index",
+ "--quiet", "HEAD", "--"],
+ cwd=self.work_dir,
+ stderr=subprocess.STDOUT)
+ return False
+ except subprocess.CalledProcessError:
+ return True
+
+ def get_current_branch(self):
+ """Get current branch name"""
+ return subprocess.check_output(["git", "branch", "--show-current"],
+ cwd=self.work_dir,
+ universal_newlines=True).strip()
+
+ def checkout_commit(self, commit_hash, quiet=True):
+ """Checkout a commit safely"""
+ args = ["git", "checkout", "-f"]
+ if quiet:
+ args.append("-q")
+ args.append(commit_hash)
+ try:
+ subprocess.check_output(args, cwd=self.work_dir,
+ stderr=subprocess.STDOUT)
+
+ # Double-check if branch actually switched
+ branch = self.get_short_hash("HEAD")
+ if commit_hash != branch:
+ raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'")
+
+ return True
+ except subprocess.CalledProcessError as e:
+ print(f"ERROR: Failed to checkout {commit_hash}: {e}",
+ file=sys.stderr)
+ return False
+
+
+class CacheManager:
+ """Manages persistent cache directories"""
+
+ def __init__(self, work_dir):
+ self.work_dir = work_dir
+
+ def initialize(self):
+ """Create cache directories if they don't exist"""
+ for dir_path in DIR_NAME.values():
+ abs_path = os.path.join(self.work_dir, dir_path)
+ if not os.path.exists(abs_path):
+ os.makedirs(abs_path, exist_ok=True, mode=0o755)
+
+ def get_commit_cache(self, commit_hash, path):
+ """Generate cache path for a commit"""
+ hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash)
+ if not hash_short:
+ hash_short = commit_hash
+
+ return os.path.join(path, hash_short)
+
+class KernelDocRunner:
+ """Runs kernel-doc documentation generator"""
+
+ def __init__(self, work_dir, kdoc_binary):
+ self.work_dir = work_dir
+ self.kdoc_binary = kdoc_binary
+ self.kdoc_files = None
+
+ def find_kdoc_references(self):
+ """Find all files marked with kernel-doc:: directives"""
+ if self.kdoc_files:
+ print("Using cached Kdoc refs")
+ return self.kdoc_files
+
+ print("Finding kernel-doc entries in Documentation...")
+
+ files = os.path.join(self.work_dir, 'Documentation/**/*.rst')
+ pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
+ kdoc_files = set()
+
+ for file_path in iglob(files, recursive=True):
+ try:
+ with open(file_path, 'r', encoding='utf-8') as fp:
+ for line in fp:
+ match = pattern.match(line.strip())
+ if match:
+ kdoc_files.add(match.group(1))
+
+ except OSError:
+ continue
+
+ self.kdoc_files = list(kdoc_files)
+
+ return self.kdoc_files
+
+ def gen_yaml(self, yaml_file, kdoc_files):
+ """Runs kernel-doc to generate a yaml file with man and rst."""
+ cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file]
+ cmd += kdoc_files
+
+ print(f"YAML regression test file will be stored at: {yaml_file}")
+
+ try:
+ subprocess.check_call(cmd, cwd=self.work_dir,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL)
+ except subprocess.CalledProcessError:
+ return False
+
+ return True
+
+ def run_unittest(self, yaml_file):
+ """Run unit tests with the generated yaml file"""
+ cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file]
+ result = subprocess.run(cmd, cwd=self.work_dir)
+
+ if result.returncode:
+ print("To check for problems, try to run it again with -v\n")
+ print("Use -k <regex> to filter results\n\n\t$", end="")
+ print(" ".join(cmd) + "\n")
+
+ return True
+
+ def normal_run(self, tmp_dir, output_dir, kdoc_files):
+ """Generate man, rst and errors, storing them at tmp_dir."""
+ os.makedirs(tmp_dir, exist_ok=True)
+
+ try:
+ with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out:
+ subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files,
+ cwd=self.work_dir,
+ stdout=out, stderr=subprocess.DEVNULL)
+
+ with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out:
+ with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err:
+ subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files,
+ cwd=self.work_dir,
+ stdout=out, stderr=err)
+ except subprocess.CalledProcessError:
+ return False
+
+ if output_dir:
+ os.replace(tmp_dir, output_dir)
+
+ return True
+
+ def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression,
+ is_end):
+ """Run kernel-doc on its several ways"""
+ if not kdoc_files:
+ raise RuntimeError("No kernel-doc references found")
+
+ git_helper = GitHelper(self.work_dir)
+ if not git_helper.checkout_commit(commit_hash, quiet=True):
+ raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}")
+
+ print(f"Processing {commit_hash}...")
+
+ if not is_regression:
+ return self.normal_run(tmp_dir, output_dir, kdoc_files)
+
+ yaml_file = os.path.join(tmp_dir, YAML_NAME)
+
+ if not is_end:
+ return self.gen_yaml(yaml_file, kdoc_files)
+
+ return self.run_unittest(yaml_file)
+
+class DiffManager:
+ """Compare documentation output directories with an external diff."""
+ def __init__(self, diff_tool="diff", diff_args=None):
+ self.diff_tool = diff_tool
+ # default: unified, no context, ignore whitespace changes
+ self.diff_args = diff_args or ["-u0", "-w"]
+
+ def diff_directories(self, dir1, dir2):
+ """Compare two directories using an external diff."""
+ print(f"\nDiffing {dir1} and {dir2}:")
+
+ dir1_files = set()
+ dir2_files = set()
+ has_diff = False
+
+ for root, _, files in os.walk(dir1):
+ for file in files:
+ dir1_files.add(os.path.relpath(os.path.join(root, file), dir1))
+ for root, _, files in os.walk(dir2):
+ for file in files:
+ dir2_files.add(os.path.relpath(os.path.join(root, file), dir2))
+
+ common_files = sorted(dir1_files & dir2_files)
+ for file in common_files:
+ f1 = os.path.join(dir1, file)
+ f2 = os.path.join(dir2, file)
+
+ cmd = [self.diff_tool] + self.diff_args + [f1, f2]
+ try:
+ result = subprocess.run(
+ cmd, capture_output=True, text=True, check=False
+ )
+ if result.stdout:
+ has_diff = True
+ print(f"\n{file}")
+ print(result.stdout, end="")
+ except FileNotFoundError:
+ print(f"ERROR: {self.diff_tool} not found")
+ sys.exit(1)
+
+ # Show files that exist only in one directory
+ only_in_dir1 = dir1_files - dir2_files
+ only_in_dir2 = dir2_files - dir1_files
+ if only_in_dir1 or only_in_dir2:
+ has_diff = True
+ print("\nDifferential files:")
+ for f in sorted(only_in_dir1):
+ print(f" - {f} (only in {dir1})")
+ for f in sorted(only_in_dir2):
+ print(f" + {f} (only in {dir2})")
+
+ if not has_diff:
+ print("\nNo differences between those two commits")
+
+
+class SignalHandler():
+ """Signal handler class."""
+
+ def restore(self, force_exit=False):
+ """Restore original HEAD state."""
+ if self.restored:
+ return
+
+ print(f"Restoring original branch: {self.original_head}")
+ try:
+ subprocess.check_call(
+ ["git", "checkout", "-f", self.original_head],
+ cwd=self.git_helper.work_dir,
+ stderr=subprocess.STDOUT,
+ )
+ except subprocess.CalledProcessError as e:
+ print(f"Failed to restore: {e}", file=sys.stderr)
+
+ for sig, handler in self.old_handler.items():
+ signal.signal(sig, handler)
+
+ self.restored = True
+
+ if force_exit:
+ sys.exit(1)
+
+ def signal_handler(self, sig, _):
+ """Handle interrupt signals."""
+ print(f"\nSignal {sig} received. Restoring original state...")
+
+ self.restore(force_exit=True)
+
+ def __enter__(self):
+ """Allow using it via with command."""
+ for sig in [signal.SIGINT, signal.SIGTERM]:
+ self.old_handler[sig] = signal.getsignal(sig)
+ signal.signal(sig, self.signal_handler)
+
+ return self
+
+ def __exit__(self, *args):
+ """Restore signals at the end of with block."""
+ self.restore()
+
+ def __init__(self, git_helper, original_head):
+ self.git_helper = git_helper
+ self.original_head = original_head
+ self.old_handler = {}
+ self.restored = False
+
+def parse_commit_range(value):
+ """Handle a commit range."""
+ if ".." not in value:
+ begin = value
+ end = "HEAD"
+ else:
+ begin, _, end = value.partition("..")
+ if not end:
+ end = "HEAD"
+
+ if not begin:
+ raise argparse.ArgumentTypeError("Need a commit begginning")
+
+
+ print(f"Range: {begin} to {end}")
+
+ return begin, end
+
+
+def main():
+ """Main code"""
+ parser = argparse.ArgumentParser(description="Compare kernel documentation between commits")
+ parser.add_argument("commits", type=parse_commit_range,
+ help="commit range like old..new")
+ parser.add_argument("files", nargs="*",
+ help="files to process – if supplied the --full flag is ignored")
+
+ parser.add_argument("--full", "-f", action="store_true",
+ help="Force a full scan of Documentation/*")
+
+ parser.add_argument("--regression", "-r", action="store_true",
+ help="Use YAML format to check for regressions")
+
+ parser.add_argument("--work-dir", "-w", default=WORK_DIR,
+ help="work dir (default: %(default)s)")
+
+ parser.add_argument("--clean", "-c", action="store_true",
+ help="Clean caches")
+
+ args = parser.parse_args()
+
+ if args.files and args.full:
+ raise argparse.ArgumentError(args.full,
+ "cannot combine '--full' with an explicit file list")
+
+ work_dir = os.path.abspath(args.work_dir)
+
+ # Initialize cache
+ cache = CacheManager(work_dir)
+ cache.initialize()
+
+ # Validate git repository
+ git_helper = GitHelper(work_dir)
+ if not git_helper.is_inside_repository():
+ raise RuntimeError("Must run inside Git repository")
+
+ old_commit, new_commit = args.commits
+
+ old_commit = git_helper.get_short_hash(old_commit)
+ new_commit = git_helper.get_short_hash(new_commit)
+
+ # Validate commits
+ for commit in [old_commit, new_commit]:
+ if not git_helper.is_valid_commit(commit):
+ raise RuntimeError(f"Commit '{commit}' does not exist")
+
+ # Check for uncommitted changes
+ if git_helper.has_uncommitted_changes():
+ raise RuntimeError("Uncommitted changes present. Commit or stash first.")
+
+ runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY)
+
+ # Get files to be parsed
+ cache_msg = " (results will be cached)"
+ if args.full:
+ kdoc_files = ["."]
+ diff_type = "full"
+ print(f"Parsing all files at {work_dir}")
+ if not args.files:
+ diff_type = "partial"
+ kdoc_files = runner.find_kdoc_references()
+ print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation")
+ else:
+ diff_type = "no-cache"
+ cache_msg = ""
+ kdoc_files = args.files
+
+ tmp_dir = DIR_NAME["tmp"]
+ out_path = DIR_NAME[diff_type]
+
+ if not args.regression:
+ print(f"Output will be stored at: {out_path}{cache_msg}")
+
+ # Just in case - should never happen in practice
+ if not kdoc_files:
+ raise argparse.ArgumentError(args.files,
+ "No kernel-doc references found")
+
+ original_head = git_helper.get_current_branch()
+
+ old_cache = cache.get_commit_cache(old_commit, out_path)
+ new_cache = cache.get_commit_cache(new_commit, out_path)
+
+ with SignalHandler(git_helper, original_head):
+ if args.clean or diff_type == "no-cache":
+ for cache_dir in [old_cache, new_cache]:
+ if cache_dir and os.path.exists(cache_dir):
+ shutil.rmtree(cache_dir)
+
+ if args.regression or not os.path.exists(old_cache):
+ old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files,
+ args.regression, False)
+ else:
+ old_success = True
+
+ if args.regression or not os.path.exists(new_cache):
+ new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files,
+ args.regression, True)
+ else:
+ new_success = True
+
+ if not (old_success and new_success):
+ raise RuntimeError("Failed to generate documentation")
+
+ if not args.regression:
+ diff_manager = DiffManager()
+ diff_manager.diff_directories(old_cache, new_cache)
+
+if __name__ == "__main__":
+ main()
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread