[RFC] devtools: replace get-maintainer shell wrapper with Python script

public inbox for dev@dpdk.org
 help / color / mirror / Atom feed

* [RFC] devtools: replace get-maintainer shell wrapper with Python script
@ 2026-01-31 20:48 Stephen Hemminger
  2026-02-01 13:51 ` Thomas Monjalon
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-01-31 20:48 UTC (permalink / raw)
  To: dev
  Cc: Stephen Hemminger, Thomas Monjalon, Bruce Richardson,
	Aaron Conole, David Marchand

DPDK has been reusing the Linux kernel get_maintainer perl script
but that creates an unwanted dependency on kernel source.

This new script replaces that with a standalone Python implementation
created in a few minutes with AI. The command line arguments are
a subset of the features that make sense in DPDK.

- Parse MAINTAINERS file with all standard entry types
- Extract modified files from unified diff patches
- Pattern matching for file paths with glob and regex support
- Git history analysis for commit signers and authors
- Email deduplication and .mailmap support
- Compatible command-line interface

A simple get-maintainer.sh wrapper is retained for backward compatibility.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 MAINTAINERS                         |   2 +-
 devtools/get-maintainer.py          | 997 ++++++++++++++++++++++++++++
 devtools/get-maintainer.sh          |  33 +-
 doc/guides/contributing/patches.rst |   4 +-
 4 files changed, 1004 insertions(+), 32 deletions(-)
 create mode 100755 devtools/get-maintainer.py

diff --git a/MAINTAINERS b/MAINTAINERS
index 5683b87e4a..fd90f7da23 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -96,7 +96,7 @@ F: devtools/check-git-log.sh
 F: devtools/check-spdx-tag.sh
 F: devtools/check-symbol-change.py
 F: devtools/checkpatches.sh
-F: devtools/get-maintainer.sh
+F: devtools/get-maintainer.*
 F: devtools/git-log-fixes.sh
 F: devtools/load-devel-config
 F: devtools/mailmap-ctl.py
diff --git a/devtools/get-maintainer.py b/devtools/get-maintainer.py
new file mode 100755
index 0000000000..9357206cf5
--- /dev/null
+++ b/devtools/get-maintainer.py
@@ -0,0 +1,997 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+# Copyright(c) 2025 - Python rewrite
+#
+# get_maintainer.py - Find maintainers and mailing lists for patches/files
+#
+# Based on the Linux kernel's get_maintainer.pl by Joe Perches
+# and DPDK's get-maintainer.sh wrapper script.
+#
+# Usage: get_maintainer.py [OPTIONS] <patch>
+#        get_maintainer.py [OPTIONS] -f <file>
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+VERSION = "1.0"
+
+# Default configuration
+DEFAULT_CONFIG = {
+    "email": True,
+    "email_usename": True,
+    "email_maintainer": True,
+    "email_reviewer": True,
+    "email_fixes": True,
+    "email_list": True,
+    "email_moderated_list": True,
+    "email_subscriber_list": False,
+    "email_git": False,
+    "email_git_all_signature_types": False,
+    "email_git_blame": False,
+    "email_git_blame_signatures": True,
+    "email_git_fallback": True,
+    "email_git_min_signatures": 1,
+    "email_git_max_maintainers": 5,
+    "email_git_min_percent": 5,
+    "email_git_since": "1-year-ago",
+    "email_remove_duplicates": True,
+    "email_use_mailmap": True,
+    "output_multiline": True,
+    "output_separator": ", ",
+    "output_roles": False,
+    "output_rolestats": True,
+    "output_section_maxlen": 50,
+    "scm": False,
+    "web": False,
+    "bug": False,
+    "subsystem": False,
+    "status": False,
+    "keywords": True,
+    "keywords_in_file": False,
+    "sections": False,
+    "email_file_emails": False,
+    "from_filename": False,
+    "pattern_depth": 0,
+}
+
+# Signature tags for git commit analysis
+SIGNATURE_TAGS = [
+    "Signed-off-by:",
+    "Reviewed-by:",
+    "Acked-by:",
+]
+
+
+@dataclass
+class MaintainerEntry:
+    """Represents a maintainer/list entry with role information."""
+    email: str
+    role: str = ""
+
+    def __hash__(self):
+        return hash(self.email.lower())
+
+    def __eq__(self, other):
+        if isinstance(other, MaintainerEntry):
+            return self.email.lower() == other.email.lower()
+        return False
+
+
+@dataclass
+class Section:
+    """Represents a MAINTAINERS file section."""
+    name: str
+    maintainers: list = field(default_factory=list)
+    reviewers: list = field(default_factory=list)
+    mailing_lists: list = field(default_factory=list)
+    status: str = ""
+    files: list = field(default_factory=list)
+    excludes: list = field(default_factory=list)
+    scm: list = field(default_factory=list)
+    web: list = field(default_factory=list)
+    bug: list = field(default_factory=list)
+    keywords: list = field(default_factory=list)
+    regex_patterns: list = field(default_factory=list)
+
+
+class GetMaintainer:
+    """Main class for finding maintainers."""
+
+    def __init__(self, config: dict):
+        self.config = config
+        self.sections: list[Section] = []
+        self.mailmap: dict = {"names": {}, "addresses": {}}
+        self.ignore_emails: list[str] = []
+        self.vcs_type: Optional[str] = None
+        self.root_path = self._find_root_path()
+
+        # Results
+        self.email_to: list[MaintainerEntry] = []
+        self.list_to: list[MaintainerEntry] = []
+        self.scm_list: list[str] = []
+        self.web_list: list[str] = []
+        self.bug_list: list[str] = []
+        self.subsystem_list: list[str] = []
+        self.status_list: list[str] = []
+
+        # Deduplication tracking
+        self.email_hash_name: dict = {}
+        self.email_hash_address: dict = {}
+        self.deduplicate_name_hash: dict = {}
+        self.deduplicate_address_hash: dict = {}
+
+    def _find_root_path(self) -> Path:
+        """Find the root path of the project."""
+        cwd = Path.cwd()
+
+        # Check for MAINTAINERS file in current directory or parents
+        for parent in [cwd] + list(cwd.parents):
+            if (parent / "MAINTAINERS").exists():
+                return parent
+            # Also check for common project indicators
+            if (parent / ".git").exists() or (parent / ".hg").exists():
+                if (parent / "MAINTAINERS").exists():
+                    return parent
+
+        return cwd
+
+    def _detect_vcs(self) -> Optional[str]:
+        """Detect if git is available."""
+        if self.vcs_type is not None:
+            return self.vcs_type
+
+        # Check for git
+        if (self.root_path / ".git").exists():
+            try:
+                subprocess.run(
+                    ["git", "--version"],
+                    capture_output=True,
+                    check=True
+                )
+                self.vcs_type = "git"
+                return "git"
+            except (subprocess.CalledProcessError, FileNotFoundError):
+                pass
+
+        self.vcs_type = None
+        return None
+
+    def load_maintainers_file(self, path: Optional[Path] = None) -> None:
+        """Load and parse the MAINTAINERS file."""
+        if path is None:
+            path = self.root_path / "MAINTAINERS"
+
+        if not path.exists():
+            print(f"Error: MAINTAINERS file not found: {path}", file=sys.stderr)
+            sys.exit(1)
+
+        current_section: Optional[Section] = None
+
+        with open(path, "r", encoding="utf-8", errors="replace") as f:
+            for line in f:
+                line = line.rstrip("\n\r")
+
+                # Skip empty lines and comments at the start
+                if not line or line.startswith("#"):
+                    continue
+
+                # Check for section header (line not starting with a type letter)
+                match = re.match(r"^([A-Z]):\s*(.*)$", line)
+                if match:
+                    type_char = match.group(1)
+                    value = match.group(2)
+
+                    if current_section is None:
+                        # Create a default section for entries before any header
+                        current_section = Section(name="THE REST")
+                        self.sections.append(current_section)
+
+                    self._process_section_entry(current_section, type_char, value)
+                elif line and not line[0].isspace():
+                    # New section header
+                    current_section = Section(name=line.strip())
+                    self.sections.append(current_section)
+
+    def _process_section_entry(self, section: Section, type_char: str, value: str) -> None:
+        """Process a single entry in a MAINTAINERS section."""
+        if type_char == "M":
+            section.maintainers.append(value)
+        elif type_char == "R":
+            section.reviewers.append(value)
+        elif type_char == "L":
+            section.mailing_lists.append(value)
+        elif type_char == "S":
+            section.status = value
+        elif type_char == "F":
+            # Convert glob pattern to regex
+            pattern = self._glob_to_regex(value)
+            section.files.append((value, pattern))
+        elif type_char == "X":
+            pattern = self._glob_to_regex(value)
+            section.excludes.append((value, pattern))
+        elif type_char == "N":
+            # Regex pattern for filename matching
+            section.regex_patterns.append(value)
+        elif type_char == "K":
+            section.keywords.append(value)
+        elif type_char == "T":
+            section.scm.append(value)
+        elif type_char == "W":
+            section.web.append(value)
+        elif type_char == "B":
+            section.bug.append(value)
+
+    def _glob_to_regex(self, pattern: str) -> str:
+        """Convert a glob pattern to a regex pattern."""
+        # Escape special regex characters except * and ?
+        result = re.escape(pattern)
+        # Convert glob wildcards to regex
+        result = result.replace(r"\*", ".*")
+        result = result.replace(r"\?", ".")
+        # Handle directory patterns
+        if pattern.endswith("/") or os.path.isdir(pattern):
+            if not result.endswith("/"):
+                result += "/"
+            result += ".*"
+        return f"^{result}"
+
+    def load_mailmap(self) -> None:
+        """Load the .mailmap file for email address mapping."""
+        mailmap_path = self.root_path / ".mailmap"
+        if not mailmap_path.exists():
+            return
+
+        try:
+            with open(mailmap_path, "r", encoding="utf-8", errors="replace") as f:
+                for line in f:
+                    line = re.sub(r"#.*$", "", line).strip()
+                    if not line:
+                        continue
+
+                    # Parse different mailmap formats
+                    # name1 <mail1>
+                    match = re.match(r"^([^<]+)<([^>]+)>$", line)
+                    if match:
+                        name = match.group(1).strip()
+                        address = match.group(2).strip()
+                        self.mailmap["names"][address.lower()] = name
+                        continue
+
+                    # <mail1> <mail2>
+                    match = re.match(r"^<([^>]+)>\s*<([^>]+)>$", line)
+                    if match:
+                        real_addr = match.group(1).strip()
+                        wrong_addr = match.group(2).strip()
+                        self.mailmap["addresses"][wrong_addr.lower()] = real_addr
+                        continue
+
+                    # name1 <mail1> <mail2>
+                    match = re.match(r"^(.+)<([^>]+)>\s*<([^>]+)>$", line)
+                    if match:
+                        name = match.group(1).strip()
+                        real_addr = match.group(2).strip()
+                        wrong_addr = match.group(3).strip()
+                        self.mailmap["names"][wrong_addr.lower()] = name
+                        self.mailmap["addresses"][wrong_addr.lower()] = real_addr
+                        continue
+
+                    # name1 <mail1> name2 <mail2>
+                    match = re.match(r"^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$", line)
+                    if match:
+                        real_name = match.group(1).strip()
+                        real_addr = match.group(2).strip()
+                        wrong_addr = match.group(4).strip()
+                        wrong_email = f"{match.group(3).strip()} <{wrong_addr}>"
+                        self.mailmap["names"][wrong_email.lower()] = real_name
+                        self.mailmap["addresses"][wrong_email.lower()] = real_addr
+
+        except IOError as e:
+            print(f"Warning: Could not read .mailmap: {e}", file=sys.stderr)
+
+    def load_ignore_file(self) -> None:
+        """Load the .get_maintainer.ignore file."""
+        for search_path in [".", os.environ.get("HOME", ""), ".scripts"]:
+            ignore_path = Path(search_path) / ".get_maintainer.ignore"
+            if ignore_path.exists():
+                try:
+                    with open(ignore_path, "r", encoding="utf-8") as f:
+                        for line in f:
+                            line = re.sub(r"#.*$", "", line).strip()
+                            if line and self._is_valid_email(line):
+                                self.ignore_emails.append(line.lower())
+                except IOError:
+                    pass
+                break
+
+    def load_config_file(self) -> dict:
+        """Load configuration from .get_maintainer.conf file."""
+        config_args = []
+        for search_path in [".", os.environ.get("HOME", ""), ".scripts"]:
+            conf_path = Path(search_path) / ".get_maintainer.conf"
+            if conf_path.exists():
+                try:
+                    with open(conf_path, "r", encoding="utf-8") as f:
+                        for line in f:
+                            line = re.sub(r"#.*$", "", line).strip()
+                            if line:
+                                config_args.extend(line.split())
+                except IOError:
+                    pass
+                break
+        return config_args
+
+    def _is_valid_email(self, email: str) -> bool:
+        """Basic email validation."""
+        return bool(re.match(r"^[^@]+@[^@]+\.[^@]+$", email))
+
+    def parse_email(self, formatted_email: str) -> tuple[str, str]:
+        """Parse an email address into name and address components."""
+        name = ""
+        address = ""
+
+        # Name <email@domain.com>
+        match = re.match(r"^([^<]+)<(.+@.*)>.*$", formatted_email)
+        if match:
+            name = match.group(1).strip().strip('"')
+            address = match.group(2).strip()
+            return name, address
+
+        # <email@domain.com>
+        match = re.match(r"^\s*<(.+@\S*)>.*$", formatted_email)
+        if match:
+            address = match.group(1).strip()
+            return name, address
+
+        # email@domain.com
+        match = re.match(r"^(.+@\S*).*$", formatted_email)
+        if match:
+            address = match.group(1).strip()
+
+        return name, address
+
+    def format_email(self, name: str, address: str, use_name: bool = True) -> str:
+        """Format name and address into a proper email string."""
+        name = name.strip().strip('"')
+        address = address.strip()
+
+        # Escape special characters in name
+        if name and re.search(r'[^\w\s\-]', name):
+            name = f'"{name}"'
+
+        if use_name and name:
+            return f"{name} <{address}>"
+        return address
+
+    def mailmap_email(self, email: str) -> str:
+        """Apply mailmap transformations to an email address."""
+        name, address = self.parse_email(email)
+        formatted = self.format_email(name, address, True)
+
+        real_name = name
+        real_address = address
+
+        # Check by full email first
+        if formatted.lower() in self.mailmap["names"]:
+            real_name = self.mailmap["names"][formatted.lower()]
+        elif address.lower() in self.mailmap["names"]:
+            real_name = self.mailmap["names"][address.lower()]
+
+        if formatted.lower() in self.mailmap["addresses"]:
+            real_address = self.mailmap["addresses"][formatted.lower()]
+        elif address.lower() in self.mailmap["addresses"]:
+            real_address = self.mailmap["addresses"][address.lower()]
+
+        return self.format_email(real_name, real_address, True)
+
+    def deduplicate_email(self, email: str) -> str:
+        """Deduplicate and normalize an email address."""
+        name, address = self.parse_email(email)
+        email = self.format_email(name, address, True)
+        email = self.mailmap_email(email)
+
+        if not self.config["email_remove_duplicates"]:
+            return email
+
+        name, address = self.parse_email(email)
+
+        if name and name.lower() in self.deduplicate_name_hash:
+            stored = self.deduplicate_name_hash[name.lower()]
+            name, address = stored
+        elif address.lower() in self.deduplicate_address_hash:
+            stored = self.deduplicate_address_hash[address.lower()]
+            name, address = stored
+        else:
+            self.deduplicate_name_hash[name.lower()] = (name, address)
+            self.deduplicate_address_hash[address.lower()] = (name, address)
+
+        return self.format_email(name, address, True)
+
+    def file_matches_pattern(self, filepath: str, pattern: str, regex: str) -> bool:
+        """Check if a file matches a pattern."""
+        try:
+            return bool(re.match(regex, filepath))
+        except re.error:
+            return False
+
+    def find_matching_sections(self, filepath: str) -> list[Section]:
+        """Find all sections that match a given file path."""
+        matching = []
+
+        for section in self.sections:
+            excluded = False
+
+            # Check exclude patterns first
+            for pattern, regex in section.excludes:
+                if self.file_matches_pattern(filepath, pattern, regex):
+                    excluded = True
+                    break
+
+            if excluded:
+                continue
+
+            # Check file patterns
+            for pattern, regex in section.files:
+                if self.file_matches_pattern(filepath, pattern, regex):
+                    matching.append(section)
+                    break
+            else:
+                # Check regex patterns (N: entries)
+                for regex in section.regex_patterns:
+                    try:
+                        if re.search(regex, filepath):
+                            matching.append(section)
+                            break
+                    except re.error:
+                        pass
+
+        return matching
+
+    def get_files_from_patch(self, patch_path: str) -> list[str]:
+        """Extract file paths from a patch file."""
+        files = []
+        fixes = []
+
+        try:
+            with open(patch_path, "r", encoding="utf-8", errors="replace") as f:
+                for line in f:
+                    # diff --git a/file1 b/file2
+                    match = re.match(r"^diff --git a/(\S+) b/(\S+)\s*$", line)
+                    if match:
+                        files.append(match.group(1))
+                        files.append(match.group(2))
+                        continue
+
+                    # +++ b/file or --- a/file
+                    match = re.match(r"^(?:\+\+\+|---)\s+[ab]/(.+)$", line)
+                    if match:
+                        files.append(match.group(1))
+                        continue
+
+                    # mode change
+                    match = re.match(r"^ mode change [0-7]+ => [0-7]+ (\S+)\s*$", line)
+                    if match:
+                        files.append(match.group(1))
+                        continue
+
+                    # rename from/to
+                    match = re.match(r"^rename (?:from|to) (\S+)\s*$", line)
+                    if match:
+                        files.append(match.group(1))
+                        continue
+
+                    # Fixes: tag
+                    if self.config["email_fixes"]:
+                        match = re.match(r"^Fixes:\s+([0-9a-fA-F]{6,40})", line)
+                        if match:
+                            fixes.append(match.group(1))
+
+        except IOError as e:
+            print(f"Error reading patch file: {e}", file=sys.stderr)
+            return []
+
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_files = []
+        for f in files:
+            if f not in seen:
+                seen.add(f)
+                unique_files.append(f)
+
+        return unique_files
+
+    def add_email(self, email: str, role: str) -> None:
+        """Add an email address to the results."""
+        name, address = self.parse_email(email)
+
+        if not address:
+            return
+
+        if address.lower() in [e.lower() for e in self.ignore_emails]:
+            return
+
+        formatted = self.format_email(name, address, self.config["email_usename"])
+
+        # Check for duplicates
+        if self.config["email_remove_duplicates"]:
+            if name and name.lower() in self.email_hash_name:
+                # Update role if needed
+                for entry in self.email_to:
+                    entry_name, _ = self.parse_email(entry.email)
+                    if entry_name.lower() == name.lower():
+                        if role and role not in entry.role:
+                            if entry.role:
+                                entry.role += f",{role}"
+                            else:
+                                entry.role = role
+                        return
+            if address.lower() in self.email_hash_address:
+                for entry in self.email_to:
+                    _, entry_addr = self.parse_email(entry.email)
+                    if entry_addr.lower() == address.lower():
+                        if role and role not in entry.role:
+                            if entry.role:
+                                entry.role += f",{role}"
+                            else:
+                                entry.role = role
+                        return
+
+        entry = MaintainerEntry(email=formatted, role=role)
+        self.email_to.append(entry)
+
+        if name:
+            self.email_hash_name[name.lower()] = True
+        self.email_hash_address[address.lower()] = True
+
+    def add_list(self, list_addr: str, role: str) -> None:
+        """Add a mailing list to the results."""
+        # Parse list address and any additional info
+        parts = list_addr.split(None, 1)
+        address = parts[0]
+        additional = parts[1] if len(parts) > 1 else ""
+
+        # Check for subscribers-only or moderated lists
+        if "subscribers-only" in additional:
+            if not self.config["email_subscriber_list"]:
+                return
+            role = f"subscriber list:{role}" if role else "subscriber list"
+        elif "moderated" in additional:
+            if not self.config["email_moderated_list"]:
+                return
+            role = f"moderated list:{role}" if role else "moderated list"
+        else:
+            role = f"open list:{role}" if role else "open list"
+
+        # Check for duplicates
+        for entry in self.list_to:
+            if entry.email.lower() == address.lower():
+                return
+
+        self.list_to.append(MaintainerEntry(email=address, role=role))
+
+    def process_section(self, section: Section, suffix: str = "") -> None:
+        """Process a matching section and add its entries."""
+        subsystem_name = section.name
+        if (self.config["output_section_maxlen"] and
+                len(subsystem_name) > self.config["output_section_maxlen"]):
+            subsystem_name = subsystem_name[:self.config["output_section_maxlen"] - 3] + "..."
+
+        # Add maintainers
+        if self.config["email_maintainer"]:
+            for maintainer in section.maintainers:
+                role = f"maintainer:{subsystem_name}{suffix}"
+                self.add_email(maintainer, role)
+
+        # Add reviewers
+        if self.config["email_reviewer"]:
+            for reviewer in section.reviewers:
+                role = f"reviewer:{subsystem_name}{suffix}"
+                self.add_email(reviewer, role)
+
+        # Add mailing lists
+        if self.config["email_list"]:
+            for mailing_list in section.mailing_lists:
+                role = subsystem_name if subsystem_name != "THE REST" else ""
+                self.add_list(mailing_list, role + suffix)
+
+        # Add SCM info
+        if self.config["scm"]:
+            for scm in section.scm:
+                self.scm_list.append(scm + suffix)
+
+        # Add web info
+        if self.config["web"]:
+            for web in section.web:
+                self.web_list.append(web + suffix)
+
+        # Add bug info
+        if self.config["bug"]:
+            for bug in section.bug:
+                self.bug_list.append(bug + suffix)
+
+        # Add subsystem
+        if self.config["subsystem"]:
+            self.subsystem_list.append(section.name + suffix)
+
+        # Add status
+        if self.config["status"] and section.status:
+            self.status_list.append(section.status + suffix)
+
+    def get_git_signers(self, filepath: str) -> list[tuple[str, int]]:
+        """Get commit signers from git history for a file."""
+        if self._detect_vcs() != "git":
+            return []
+
+        cmd = [
+            "git", "log",
+            "--no-color", "--follow",
+            f"--since={self.config['email_git_since']}",
+            "--numstat", "--no-merges",
+            '--format=GitCommit: %H%nGitAuthor: %an <%ae>%nGitDate: %aD%nGitSubject: %s%n%b',
+            "--", filepath
+        ]
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                cwd=self.root_path
+            )
+            if result.returncode != 0:
+                return []
+
+            signers = defaultdict(int)
+            signature_pattern = "|".join(re.escape(tag) for tag in SIGNATURE_TAGS)
+            if self.config["email_git_all_signature_types"]:
+                signature_pattern = r".+[Bb][Yy]:"
+
+            for line in result.stdout.split("\n"):
+                # Match author lines
+                match = re.match(r"^GitAuthor:\s*(.+)$", line)
+                if match:
+                    email = self.deduplicate_email(match.group(1))
+                    signers[email] += 1
+                    continue
+
+                # Match signature lines
+                match = re.match(rf"^\s*({signature_pattern})\s*(.+@.+)$", line)
+                if match:
+                    email = self.deduplicate_email(match.group(2))
+                    signers[email] += 1
+
+            return sorted(signers.items(), key=lambda x: -x[1])
+
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            return []
+
+    def add_vcs_signers(self, filepath: str, exact_match: bool) -> None:
+        """Add signers from git history."""
+        if not self.config["email_git"]:
+            if not (self.config["email_git_fallback"] and not exact_match):
+                return
+
+        if self._detect_vcs() != "git":
+            return
+
+        signers = self.get_git_signers(filepath)
+
+        total_commits = sum(count for _, count in signers)
+        if total_commits == 0:
+            return
+
+        added = 0
+        for email, count in signers:
+            if added >= self.config["email_git_max_maintainers"]:
+                break
+            if count < self.config["email_git_min_signatures"]:
+                break
+
+            percent = (count * 100) // total_commits
+            if percent < self.config["email_git_min_percent"]:
+                break
+
+            if self.config["output_rolestats"]:
+                role = f"commit_signer:{count}/{total_commits}={percent}%"
+            else:
+                role = "commit_signer"
+
+            self.add_email(email, role)
+            added += 1
+
+    def find_maintainers(self, files: list[str]) -> None:
+        """Find maintainers for the given files."""
+        exact_matches = set()
+
+        for filepath in files:
+            matching_sections = self.find_matching_sections(filepath)
+
+            # Track if we found an exact match
+            for section in matching_sections:
+                if section.status and "maintain" in section.status.lower():
+                    if section.maintainers:
+                        exact_matches.add(filepath)
+
+            for section in matching_sections:
+                self.process_section(section)
+
+        # Add VCS signers
+        if self.config["email"]:
+            for filepath in files:
+                exact_match = filepath in exact_matches
+                self.add_vcs_signers(filepath, exact_match)
+
+    def output_results(self) -> None:
+        """Output the results."""
+        results = []
+
+        # Combine and deduplicate results
+        seen_emails = set()
+
+        if self.config["email"]:
+            for entry in self.email_to + self.list_to:
+                email_lower = entry.email.lower()
+                if email_lower in seen_emails:
+                    continue
+                seen_emails.add(email_lower)
+
+                if self.config["output_roles"] or self.config["output_rolestats"]:
+                    results.append(f"{entry.email} ({entry.role})")
+                else:
+                    results.append(entry.email)
+
+        # Output
+        if self.config["output_multiline"]:
+            for result in results:
+                print(result)
+        else:
+            print(self.config["output_separator"].join(results))
+
+        # Additional outputs
+        if self.config["scm"]:
+            for scm in sorted(set(self.scm_list)):
+                print(scm)
+
+        if self.config["status"]:
+            for status in sorted(set(self.status_list)):
+                print(status)
+
+        if self.config["subsystem"]:
+            for subsystem in sorted(set(self.subsystem_list)):
+                print(subsystem)
+
+        if self.config["web"]:
+            for web in sorted(set(self.web_list)):
+                print(web)
+
+        if self.config["bug"]:
+            for bug in sorted(set(self.bug_list)):
+                print(bug)
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Find maintainers and mailing lists for patches or files",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s patch.diff              Find maintainers for a patch
+  %(prog)s -f drivers/net/foo.c    Find maintainers for a file
+  %(prog)s --no-git patch.diff     Skip git history analysis
+
+Default options:
+  [--email --nogit --git-fallback --m --r --n --l --multiline
+   --pattern-depth=0 --remove-duplicates --rolestats --keywords]
+"""
+    )
+
+    parser.add_argument("files", nargs="*", help="Patch files or files to check")
+    parser.add_argument("-v", "--version", action="version", version=f"%(prog)s {VERSION}")
+
+    # Email options
+    email_group = parser.add_argument_group("Email options")
+    email_group.add_argument("--email", dest="email", action="store_true", default=True,
+                            help="Print email addresses (default)")
+    email_group.add_argument("--no-email", dest="email", action="store_false",
+                            help="Don't print email addresses")
+    email_group.add_argument("-m", dest="email_maintainer", action="store_true", default=True,
+                            help="Include maintainers")
+    email_group.add_argument("--no-m", dest="email_maintainer", action="store_false",
+                            help="Exclude maintainers")
+    email_group.add_argument("-r", dest="email_reviewer", action="store_true", default=True,
+                            help="Include reviewers")
+    email_group.add_argument("--no-r", dest="email_reviewer", action="store_false",
+                            help="Exclude reviewers")
+    email_group.add_argument("-n", dest="email_usename", action="store_true", default=True,
+                            help="Include name in email")
+    email_group.add_argument("--no-n", dest="email_usename", action="store_false",
+                            help="Don't include name in email")
+    email_group.add_argument("-l", dest="email_list", action="store_true", default=True,
+                            help="Include mailing lists")
+    email_group.add_argument("--no-l", dest="email_list", action="store_false",
+                            help="Exclude mailing lists")
+    email_group.add_argument("--moderated", dest="email_moderated_list", action="store_true", default=True,
+                            help="Include moderated mailing lists")
+    email_group.add_argument("--no-moderated", dest="email_moderated_list", action="store_false",
+                            help="Exclude moderated mailing lists")
+    email_group.add_argument("-s", dest="email_subscriber_list", action="store_true", default=False,
+                            help="Include subscriber-only mailing lists")
+    email_group.add_argument("--no-s", dest="email_subscriber_list", action="store_false",
+                            help="Exclude subscriber-only mailing lists")
+    email_group.add_argument("--remove-duplicates", dest="email_remove_duplicates",
+                            action="store_true", default=True,
+                            help="Remove duplicate email addresses")
+    email_group.add_argument("--no-remove-duplicates", dest="email_remove_duplicates",
+                            action="store_false",
+                            help="Don't remove duplicate email addresses")
+    email_group.add_argument("--mailmap", dest="email_use_mailmap", action="store_true", default=True,
+                            help="Use .mailmap file")
+    email_group.add_argument("--no-mailmap", dest="email_use_mailmap", action="store_false",
+                            help="Don't use .mailmap file")
+    email_group.add_argument("--fixes", dest="email_fixes", action="store_true", default=True,
+                            help="Add signers from Fixes: commits")
+    email_group.add_argument("--no-fixes", dest="email_fixes", action="store_false",
+                            help="Don't add signers from Fixes: commits")
+
+    # Git options
+    git_group = parser.add_argument_group("Git options")
+    git_group.add_argument("--git", dest="email_git", action="store_true", default=False,
+                          help="Include recent git signers")
+    git_group.add_argument("--no-git", dest="email_git", action="store_false",
+                          help="Don't include git signers")
+    git_group.add_argument("--git-fallback", dest="email_git_fallback", action="store_true", default=True,
+                          help="Use git when no exact MAINTAINERS match")
+    git_group.add_argument("--no-git-fallback", dest="email_git_fallback", action="store_false",
+                          help="Don't use git fallback")
+    git_group.add_argument("--git-all-signature-types", dest="email_git_all_signature_types",
+                          action="store_true", default=False,
+                          help="Include all signature types")
+    git_group.add_argument("--git-blame", dest="email_git_blame", action="store_true", default=False,
+                          help="Use git blame")
+    git_group.add_argument("--no-git-blame", dest="email_git_blame", action="store_false",
+                          help="Don't use git blame")
+    git_group.add_argument("--git-min-signatures", type=int, default=1,
+                          help="Minimum signatures required (default: 1)")
+    git_group.add_argument("--git-max-maintainers", type=int, default=5,
+                          help="Maximum maintainers to add (default: 5)")
+    git_group.add_argument("--git-min-percent", type=int, default=5,
+                          help="Minimum percentage of commits (default: 5)")
+    git_group.add_argument("--git-since", default="1-year-ago",
+                          help="Git history to use (default: 1-year-ago)")
+
+    # Output options
+    output_group = parser.add_argument_group("Output options")
+    output_group.add_argument("--multiline", dest="output_multiline", action="store_true", default=True,
+                             help="Print one entry per line (default)")
+    output_group.add_argument("--no-multiline", dest="output_multiline", action="store_false",
+                             help="Print all entries on one line")
+    output_group.add_argument("--separator", dest="output_separator", default=", ",
+                             help="Separator for single-line output (default: ', ')")
+    output_group.add_argument("--roles", dest="output_roles", action="store_true", default=False,
+                             help="Show roles")
+    output_group.add_argument("--no-roles", dest="output_roles", action="store_false",
+                             help="Don't show roles")
+    output_group.add_argument("--rolestats", dest="output_rolestats", action="store_true", default=True,
+                             help="Show roles and statistics (default)")
+    output_group.add_argument("--no-rolestats", dest="output_rolestats", action="store_false",
+                             help="Don't show role statistics")
+
+    # Other options
+    other_group = parser.add_argument_group("Other options")
+    other_group.add_argument("-f", "--file", dest="from_filename", action="store_true", default=False,
+                            help="Treat arguments as filenames, not patches")
+    other_group.add_argument("--scm", action="store_true", default=False,
+                            help="Print SCM information")
+    other_group.add_argument("--no-scm", dest="scm", action="store_false",
+                            help="Don't print SCM information")
+    other_group.add_argument("--status", action="store_true", default=False,
+                            help="Print status information")
+    other_group.add_argument("--no-status", dest="status", action="store_false",
+                            help="Don't print status information")
+    other_group.add_argument("--subsystem", action="store_true", default=False,
+                            help="Print subsystem name")
+    other_group.add_argument("--no-subsystem", dest="subsystem", action="store_false",
+                            help="Don't print subsystem name")
+    other_group.add_argument("--web", action="store_true", default=False,
+                            help="Print website information")
+    other_group.add_argument("--no-web", dest="web", action="store_false",
+                            help="Don't print website information")
+    other_group.add_argument("--bug", action="store_true", default=False,
+                            help="Print bug reporting information")
+    other_group.add_argument("--no-bug", dest="bug", action="store_false",
+                            help="Don't print bug reporting information")
+    other_group.add_argument("-k", "--keywords", action="store_true", default=True,
+                            help="Scan for keywords")
+    other_group.add_argument("--no-keywords", dest="keywords", action="store_false",
+                            help="Don't scan for keywords")
+    other_group.add_argument("--pattern-depth", type=int, default=0,
+                            help="Pattern directory traversal depth (default: 0 = all)")
+    other_group.add_argument("--sections", action="store_true", default=False,
+                            help="Print all matching sections")
+    other_group.add_argument("--maintainer-path", "--mpath",
+                            help="Path to MAINTAINERS file")
+
+    return parser.parse_args()
+
+
+def main():
+    """Main entry point."""
+    args = parse_args()
+
+    # Build configuration from arguments
+    config = DEFAULT_CONFIG.copy()
+    for key in config:
+        if hasattr(args, key):
+            config[key] = getattr(args, key)
+
+    # Handle special cases
+    if args.output_separator != ", ":
+        config["output_multiline"] = False
+
+    if config["output_rolestats"]:
+        config["output_roles"] = True
+
+    # Create maintainer finder
+    gm = GetMaintainer(config)
+
+    # Load configuration files
+    gm.load_ignore_file()
+    if config["email_use_mailmap"]:
+        gm.load_mailmap()
+
+    # Load MAINTAINERS file
+    if args.maintainer_path:
+        gm.load_maintainers_file(Path(args.maintainer_path))
+    else:
+        gm.load_maintainers_file()
+
+    # Get files to process
+    if not args.files:
+        if sys.stdin.isatty():
+            print("Error: No files specified", file=sys.stderr)
+            sys.exit(1)
+        # Read from stdin
+        args.files = ["-"]
+
+    all_files = []
+    for file_arg in args.files:
+        if file_arg == "-":
+            # Read patch from stdin
+            import tempfile
+            with tempfile.NamedTemporaryFile(mode="w", suffix=".patch", delete=False) as tmp:
+                tmp.write(sys.stdin.read())
+                tmp_path = tmp.name
+            all_files.extend(gm.get_files_from_patch(tmp_path))
+            os.unlink(tmp_path)
+        elif args.from_filename:
+            # Treat as file path
+            all_files.append(file_arg)
+        else:
+            # Treat as patch file
+            patch_files = gm.get_files_from_patch(file_arg)
+            if not patch_files:
+                print(f"Warning: '{file_arg}' doesn't appear to be a patch. Use -f to treat as file.",
+                      file=sys.stderr)
+            all_files.extend(patch_files)
+
+    if not all_files:
+        print("Error: No files found to process", file=sys.stderr)
+        sys.exit(1)
+
+    # Find maintainers
+    gm.find_maintainers(all_files)
+
+    # Output results
+    gm.output_results()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/devtools/get-maintainer.sh b/devtools/get-maintainer.sh
index bba4d3f68d..915c31a359 100755
--- a/devtools/get-maintainer.sh
+++ b/devtools/get-maintainer.sh
@@ -1,34 +1,9 @@
 #!/bin/sh
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
+#
+# Wrapper script for get_maintainer.py for backward compatibility
 
+SCRIPT_DIR=$(dirname $(readlink -f $0))
 
-# Load config options:
-# - DPDK_GETMAINTAINER_PATH
-. $(dirname $(readlink -f $0))/load-devel-config
-
-options="--no-tree --no-git-fallback"
-options="$options --no-rolestats"
-
-print_usage () {
-	cat <<- END_OF_HELP
-	usage: $(basename $0) <patch>
-
-	The DPDK_GETMAINTAINER_PATH variable should be set to the full path to
-	the get_maintainer.pl script located in Linux kernel sources. Example:
-	DPDK_GETMAINTAINER_PATH=~/linux/scripts/get_maintainer.pl
-
-	Also refer to devtools/load-devel-config to store your configuration.
-	END_OF_HELP
-}
-
-# Requires DPDK_GETMAINTAINER_PATH devel config option set
-if [ ! -f "$DPDK_GETMAINTAINER_PATH" ] ||
-   [ ! -x "$DPDK_GETMAINTAINER_PATH" ] ; then
-	print_usage >&2
-	echo
-	echo 'Cannot execute DPDK_GETMAINTAINER_PATH' >&2
-	exit 1
-fi
-
-$DPDK_GETMAINTAINER_PATH $options $@
+exec python3 "$SCRIPT_DIR/get-maintainer.py" "$@"
diff --git a/doc/guides/contributing/patches.rst b/doc/guides/contributing/patches.rst
index 069a18e4ec..c46fca8eb9 100644
--- a/doc/guides/contributing/patches.rst
+++ b/doc/guides/contributing/patches.rst
@@ -562,9 +562,9 @@ The appropriate maintainer can be found in the ``MAINTAINERS`` file::
 
    git send-email --to maintainer@some.org --cc dev@dpdk.org 000*.patch
 
-Script ``get-maintainer.sh`` can be used to select maintainers automatically::
+Script ``get-maintainer.py`` can be used to select maintainers automatically::
 
-  git send-email --to-cmd ./devtools/get-maintainer.sh --cc dev@dpdk.org 000*.patch
+  git send-email --to-cmd ./devtools/get-maintainer.py --cc dev@dpdk.org 000*.patch
 
 You can test the emails by sending it to yourself or with the ``--dry-run`` option.
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC] devtools: replace get-maintainer shell wrapper with Python script
  2026-01-31 20:48 [RFC] devtools: replace get-maintainer shell wrapper with Python script Stephen Hemminger
@ 2026-02-01 13:51 ` Thomas Monjalon
  2026-02-01 19:01   ` Stephen Hemminger
  2026-02-01 19:22 ` [RFC v2] devtools: replace checkpatches " Stephen Hemminger
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 13+ messages in thread
From: Thomas Monjalon @ 2026-02-01 13:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Bruce Richardson, Aaron Conole, David Marchand

31/01/2026 21:48, Stephen Hemminger:
> DPDK has been reusing the Linux kernel get_maintainer perl script
> but that creates an unwanted dependency on kernel source.
> 
> This new script replaces that with a standalone Python implementation
> created in a few minutes with AI. The command line arguments are
> a subset of the features that make sense in DPDK.

Almost thousand lines for this new script.
Are you sure that's something we want to maintain ourself?




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] devtools: replace get-maintainer shell wrapper with Python script
  2026-02-01 13:51 ` Thomas Monjalon
@ 2026-02-01 19:01   ` Stephen Hemminger
  2026-02-01 20:16     ` Thomas Monjalon
  0 siblings, 1 reply; 13+ messages in thread
From: Stephen Hemminger @ 2026-02-01 19:01 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, Bruce Richardson, Aaron Conole, David Marchand

On Sun, 01 Feb 2026 14:51:01 +0100
Thomas Monjalon <thomas@monjalon.net> wrote:

> 31/01/2026 21:48, Stephen Hemminger:
> > DPDK has been reusing the Linux kernel get_maintainer perl script
> > but that creates an unwanted dependency on kernel source.
> > 
> > This new script replaces that with a standalone Python implementation
> > created in a few minutes with AI. The command line arguments are
> > a subset of the features that make sense in DPDK.  
> 
> Almost thousand lines for this new script.
> Are you sure that's something we want to maintain ourself?

It really is less bad than the awk mess.
And the kernel often changes the rules.

The bigger issue is that the python version is not detecting everything yet.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC v2] devtools: replace checkpatches shell wrapper with Python script
  2026-01-31 20:48 [RFC] devtools: replace get-maintainer shell wrapper with Python script Stephen Hemminger
  2026-02-01 13:51 ` Thomas Monjalon
@ 2026-02-01 19:22 ` Stephen Hemminger
  2026-02-03 14:17 ` [RFC v3] " Stephen Hemminger
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-02-01 19:22 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Replace the checkpatches.sh shell script that wraps the Linux kernel's
checkpatch.pl with a standalone Python implementation. The new
checkpatches.py script provides equivalent functionality without
requiring the kernel source tree or Perl.

Performance is significantly improved: checking a recent flow parser
patch series dropped from 2m23s to 0.43s (approximately 300x faster).
The script is in a beta test state, it finds the same or more
issues as the original.

Features:

Patch validation:
  - Format validation and corruption detection
  - Signed-off-by verification
  - Fixes tag format checking
  - Line length and trailing whitespace checks

Kernel checkpatch.pl compatible checks:
  - AVOID_EXTERNS, UNNECESSARY_BREAK, COMPLEX_MACRO
  - STRNCPY/STRCPY, RETURN_PARENTHESES, POINTER_LOCATION
  - INITIALISED_STATIC, GLOBAL_INITIALISERS, DEEP_INDENTATION
  - TRAILING_STATEMENTS, LINE_CONTINUATIONS, ONE_SEMICOLON
  - REPEATED_WORD, CONSTANT_COMPARISON, SELF_ASSIGNMENT
  - INLINE_LOCATION, STORAGE_CLASS, FUNCTION_WITHOUT_ARGS
  - MACRO_WITH_FLOW_CONTROL, MULTISTATEMENT_MACRO_USE_DO_WHILE
  - PREFER_DEFINED_ATTRIBUTE_MACRO (using DPDK __rte_* macros)
  - TYPO_SPELLING via codespell dictionary

DPDK-specific forbidden token checks:
  - RTE_LOG usage (prefer RTE_LOG_LINE)
  - printf/fprintf to stdout/stderr in libs/drivers
  - rte_panic/rte_exit, direct __attribute__ usage
  - Deprecated atomics (rte_atomicNN_xxx, rte_smp_[rw]mb)
  - Compiler builtins (__sync_xxx, __atomic_xxx, __builtin_xxx)
  - pthread functions (prefer rte_thread)
  - Reserved keywords, pragma, variadic macros

DPDK tag validation:
  - __rte_experimental and __rte_internal placement
  - __rte_packed_begin/end pairing
  - __rte_aligned attribute checking

Git integration:
  - Check commits by range (-r) or count (-n)
  - Read patches from files or stdin

Known limitations:
  - BRACES check not implemented (requires multi-line analysis)

This script was developed with assistance from the Claude AI assistant.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 devtools/checkpatches.py | 1259 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 1259 insertions(+)
 create mode 100755 devtools/checkpatches.py

diff --git a/devtools/checkpatches.py b/devtools/checkpatches.py
new file mode 100755
index 0000000000..0feb6b7624
--- /dev/null
+++ b/devtools/checkpatches.py
@@ -0,0 +1,1259 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2015 6WIND S.A.
+# Copyright 2025 - Python rewrite
+#
+# checkpatch.py - Check patches for common style issues
+#
+# This is a standalone Python replacement for the DPDK checkpatches.sh
+# script that previously wrapped the Linux kernel's checkpatch.pl.
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+VERSION = "1.0"
+
+# Default configuration
+DEFAULT_LINE_LENGTH = 100
+DEFAULT_CODESPELL_DICT = "/usr/share/codespell/dictionary.txt"
+
+
+@dataclass
+class CheckResult:
+    """Result of a single check."""
+    level: str  # ERROR, WARNING, CHECK
+    type_name: str
+    message: str
+    filename: str = ""
+    line_num: int = 0
+    line_content: str = ""
+
+
+@dataclass
+class PatchInfo:
+    """Information extracted from a patch."""
+    subject: str = ""
+    author: str = ""
+    author_email: str = ""
+    signoffs: list = field(default_factory=list)
+    files: list = field(default_factory=list)
+    added_lines: dict = field(default_factory=dict)  # filename -> [(line_num, content)]
+    has_fixes_tag: bool = False
+    fixes_commits: list = field(default_factory=list)
+
+
+class CheckPatch:
+    """Main class for checking patches."""
+
+    def __init__(self, config: dict):
+        self.config = config
+        self.results: list[CheckResult] = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Load codespell dictionary if enabled
+        self.spelling_dict = {}
+        if config.get("codespell"):
+            self._load_codespell_dict()
+
+        # DPDK-specific ignore list (matches original shell script)
+        self.ignored_types = set([
+            "LINUX_VERSION_CODE", "ENOSYS", "FILE_PATH_CHANGES",
+            "MAINTAINERS_STYLE", "SPDX_LICENSE_TAG", "VOLATILE",
+            "PREFER_PACKED", "PREFER_ALIGNED", "PREFER_PRINTF", "STRLCPY",
+            "PREFER_KERNEL_TYPES", "PREFER_FALLTHROUGH", "BIT_MACRO",
+            "CONST_STRUCT", "SPLIT_STRING", "LONG_LINE_STRING",
+            "C99_COMMENT_TOLERANCE", "LINE_SPACING", "PARENTHESIS_ALIGNMENT",
+            "NETWORKING_BLOCK_COMMENT_STYLE", "NEW_TYPEDEFS",
+            "COMPARISON_TO_NULL", "AVOID_BUG", "EXPORT_SYMBOL",
+            "BAD_REPORTED_BY_LINK"
+        ])
+
+        # Forbidden token rules for DPDK
+        self.forbidden_rules = self._init_forbidden_rules()
+
+    def _load_codespell_dict(self) -> None:
+        """Load the codespell dictionary."""
+        dict_path = self.config.get("codespell_file")
+
+        if not dict_path:
+            # Search common locations for the dictionary
+            search_paths = [
+                DEFAULT_CODESPELL_DICT,
+                "/usr/local/lib/python3.12/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.11/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.10/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/lib/python3/dist-packages/codespell_lib/data/dictionary.txt",
+            ]
+
+            # Also try to find it via codespell module
+            try:
+                import codespell_lib
+                module_path = os.path.join(
+                    os.path.dirname(codespell_lib.__file__),
+                    'data', 'dictionary.txt'
+                )
+                search_paths.insert(0, module_path)
+            except ImportError:
+                pass
+
+            for path in search_paths:
+                if os.path.exists(path):
+                    dict_path = path
+                    break
+
+        if not dict_path or not os.path.exists(dict_path):
+            return
+
+        try:
+            with open(dict_path, "r", encoding="utf-8", errors="ignore") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith("#"):
+                        continue
+                    parts = line.split("->")
+                    if len(parts) >= 2:
+                        wrong = parts[0].strip().lower()
+                        correct = parts[1].strip().split(",")[0].strip()
+                        self.spelling_dict[wrong] = correct
+        except IOError:
+            pass
+
+    def _init_forbidden_rules(self) -> list:
+        """Initialize DPDK-specific forbidden token rules."""
+        return [
+            # Refrain from new calls to RTE_LOG in libraries
+            {
+                "folders": ["lib"],
+                "patterns": [r"RTE_LOG\("],
+                "message": "Prefer RTE_LOG_LINE",
+            },
+            # Refrain from new calls to RTE_LOG in drivers
+            {
+                "folders": ["drivers"],
+                "skip_files": [r".*osdep\.h$"],
+                "patterns": [r"RTE_LOG\(", r"RTE_LOG_DP\(", r"rte_log\("],
+                "message": "Prefer RTE_LOG_LINE/RTE_LOG_DP_LINE",
+            },
+            # No output on stdout or stderr
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bprintf\b", r"fprintf\(stdout,", r"fprintf\(stderr,"],
+                "message": "Writing to stdout or stderr",
+            },
+            # Refrain from rte_panic() and rte_exit()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_panic\(", r"rte_exit\("],
+                "message": "Using rte_panic/rte_exit",
+            },
+            # Don't call directly install_headers()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\binstall_headers\b"],
+                "message": "Using install_headers()",
+            },
+            # Refrain from using compiler attribute without common macro
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"__attribute__"],
+                "message": "Using compiler attribute directly",
+            },
+            # Check %l or %ll format specifier
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"%ll*[xud]"],
+                "message": "Using %l format, prefer %PRI*64 if type is [u]int64_t",
+            },
+            # Refrain from 16/32/64 bits rte_atomicNN_xxx()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_atomic[0-9][0-9]_.*\("],
+                "message": "Using rte_atomicNN_xxx",
+            },
+            # Refrain from rte_smp_[r/w]mb()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_smp_(r|w)?mb\("],
+                "message": "Using rte_smp_[r/w]mb",
+            },
+            # Refrain from __sync_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__sync_.*\("],
+                "message": "Using __sync_xxx builtins",
+            },
+            # Refrain from __rte_atomic_thread_fence()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__rte_atomic_thread_fence\("],
+                "message": "Using __rte_atomic_thread_fence, prefer rte_atomic_thread_fence",
+            },
+            # Refrain from __atomic_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"drivers/common/cnxk/"],
+                "patterns": [r"__atomic_.*\(", r"__ATOMIC_(RELAXED|CONSUME|ACQUIRE|RELEASE|ACQ_REL|SEQ_CST)"],
+                "message": "Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx",
+            },
+            # Refrain from some pthread functions
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"pthread_(create|join|detach|set(_?name_np|affinity_np)|attr_set(inheritsched|schedpolicy))\("],
+                "message": "Using pthread functions, prefer rte_thread",
+            },
+            # Forbid use of __reserved
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__reserved\b"],
+                "message": "Using __reserved",
+            },
+            # Forbid use of __alignof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__alignof__\b"],
+                "message": "Using __alignof__, prefer C11 alignof",
+            },
+            # Forbid use of __typeof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__typeof__\b"],
+                "message": "Using __typeof__, prefer typeof",
+            },
+            # Forbid use of __builtin_*
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/", r"drivers/.*/base/", r"drivers/.*osdep\.h$"],
+                "patterns": [r"\b__builtin_"],
+                "message": "Using __builtin helpers, prefer EAL macros",
+            },
+            # Forbid inclusion of linux/pci_regs.h
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"include.*linux/pci_regs\.h"],
+                "message": "Using linux/pci_regs.h, prefer rte_pci.h",
+            },
+            # Forbid variadic argument pack extension in macros
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"#\s*define.*[^(,\s]\.\.\.[\s]*\)"],
+                "message": "Do not use variadic argument pack in macros",
+            },
+            # Forbid __rte_packed_begin with enums
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"enum.*__rte_packed_begin"],
+                "message": "Using __rte_packed_begin with enum is not allowed",
+            },
+            # Forbid use of #pragma
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"(#pragma|_Pragma)"],
+                "message": "Using compilers pragma is not allowed",
+            },
+            # Forbid experimental build flag except in examples
+            {
+                "folders": ["lib", "drivers", "app"],
+                "patterns": [r"-DALLOW_EXPERIMENTAL_API", r"allow_experimental_apis"],
+                "message": "Using experimental build flag for in-tree compilation",
+            },
+            # Refrain from using RTE_LOG_REGISTER for drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bRTE_LOG_REGISTER\b"],
+                "message": "Using RTE_LOG_REGISTER, prefer RTE_LOG_REGISTER_(DEFAULT|SUFFIX)",
+            },
+            # Forbid non-internal thread in drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_thread_(set_name|create_control)\("],
+                "message": "Prefer rte_thread_(set_prefixed_name|create_internal_control)",
+            },
+        ]
+
+    def add_result(self, level: str, type_name: str, message: str,
+                   filename: str = "", line_num: int = 0, line_content: str = "") -> None:
+        """Add a check result."""
+        if type_name.upper() in self.ignored_types:
+            return
+
+        result = CheckResult(
+            level=level,
+            type_name=type_name,
+            message=message,
+            filename=filename,
+            line_num=line_num,
+            line_content=line_content
+        )
+        self.results.append(result)
+
+        if level == "ERROR":
+            self.errors += 1
+        elif level == "WARNING":
+            self.warnings += 1
+        else:
+            self.checks += 1
+
+    def parse_patch(self, content: str) -> PatchInfo:
+        """Parse a patch and extract information."""
+        info = PatchInfo()
+        current_file = ""
+        in_diff = False
+        line_num_in_new = 0
+
+        lines = content.split("\n")
+        for i, line in enumerate(lines):
+            # Extract subject
+            if line.startswith("Subject:"):
+                subject = line[8:].strip()
+                # Handle multi-line subjects
+                j = i + 1
+                while j < len(lines) and lines[j].startswith(" "):
+                    subject += " " + lines[j].strip()
+                    j += 1
+                info.subject = subject
+
+            # Extract author
+            if line.startswith("From:"):
+                info.author = line[5:].strip()
+                match = re.search(r"<([^>]+)>", info.author)
+                if match:
+                    info.author_email = match.group(1)
+
+            # Extract Signed-off-by
+            match = re.match(r"^Signed-off-by:\s*(.+)$", line, re.IGNORECASE)
+            if match:
+                info.signoffs.append(match.group(1).strip())
+
+            # Extract Fixes tag
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                info.has_fixes_tag = True
+                info.fixes_commits.append(match.group(1))
+
+            # Track files in diff
+            if line.startswith("diff --git"):
+                match = re.match(r"diff --git a/(\S+) b/(\S+)", line)
+                if match:
+                    current_file = match.group(2)
+                    if current_file not in info.files:
+                        info.files.append(current_file)
+                    info.added_lines[current_file] = []
+                in_diff = True
+
+            # Track hunks
+            if line.startswith("@@"):
+                match = re.match(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
+                if match:
+                    line_num_in_new = int(match.group(1))
+                continue
+
+            # Track added lines
+            if in_diff and current_file:
+                if line.startswith("+") and not line.startswith("+++"):
+                    info.added_lines[current_file].append((line_num_in_new, line[1:]))
+                    line_num_in_new += 1
+                elif line.startswith("-"):
+                    pass  # Deleted line, don't increment
+                elif not line.startswith("\\"):
+                    line_num_in_new += 1
+
+        return info
+
+    def check_line_length(self, patch_info: PatchInfo) -> None:
+        """Check for lines exceeding maximum length."""
+        max_len = self.config.get("max_line_length", DEFAULT_LINE_LENGTH)
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # Skip strings that span multiple lines
+                if len(content) > max_len:
+                    # Don't warn about long strings or URLs
+                    if '\"' in content and content.count('\"') >= 2:
+                        continue
+                    if "http://" in content or "https://" in content:
+                        continue
+                    # Check if it's a comment line
+                    if content.strip().startswith("/*") or content.strip().startswith("*") or content.strip().startswith("//"):
+                        self.add_result(
+                            "WARNING", "LONG_LINE_COMMENT",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+                    else:
+                        self.add_result(
+                            "WARNING", "LONG_LINE",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+
+    def check_trailing_whitespace(self, patch_info: PatchInfo) -> None:
+        """Check for trailing whitespace."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if content != content.rstrip():
+                    self.add_result(
+                        "WARNING", "TRAILING_WHITESPACE",
+                        "trailing whitespace",
+                        filename, line_num, content
+                    )
+
+    def check_tabs_spaces(self, patch_info: PatchInfo) -> None:
+        """Check for space before tab and mixed indentation."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if " \t" in content:
+                    self.add_result(
+                        "WARNING", "SPACE_BEFORE_TAB",
+                        "space before tab in indent",
+                        filename, line_num, content
+                    )
+
+    def check_signoff(self, patch_info: PatchInfo) -> None:
+        """Check for Signed-off-by line."""
+        if not patch_info.signoffs:
+            self.add_result(
+                "ERROR", "MISSING_SIGN_OFF",
+                "Missing Signed-off-by: line(s)"
+            )
+
+    def check_coding_style(self, patch_info: PatchInfo) -> None:
+        """Check various coding style issues."""
+        for filename, lines in patch_info.added_lines.items():
+            # Skip non-C files for most checks
+            is_c_file = filename.endswith((".c", ".h"))
+            is_c_source = filename.endswith(".c")
+            is_header = filename.endswith(".h")
+
+            prev_line = ""
+            indent_stack = []
+            for line_num, content in lines:
+                self.lines_checked += 1
+
+                if is_c_file:
+                    # Check for externs in .c files
+                    if is_c_source and re.match(r"^\s*extern\b", content):
+                        self.add_result(
+                            "WARNING", "AVOID_EXTERNS",
+                            "externs should be avoided in .c files",
+                            filename, line_num, content
+                        )
+
+                    # Check for unnecessary break after goto/return/continue
+                    if re.match(r"^\s*break\s*;", content):
+                        if re.match(r"^\s*(goto|return|continue)\b", prev_line):
+                            self.add_result(
+                                "WARNING", "UNNECESSARY_BREAK",
+                                "break is not useful after a goto or return",
+                                filename, line_num, content
+                            )
+
+                    # STRNCPY: should use strlcpy
+                    if re.search(r"\bstrncpy\s*\(", content):
+                        self.add_result(
+                            "WARNING", "STRNCPY",
+                            "Prefer strlcpy over strncpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/",
+                            filename, line_num, content
+                        )
+
+                    # STRCPY: unsafe string copy
+                    if re.search(r"\bstrcpy\s*\(", content):
+                        self.add_result(
+                            "ERROR", "STRCPY",
+                            "strcpy is unsafe - use strlcpy or snprintf",
+                            filename, line_num, content
+                        )
+
+                    # Check for complex macros without proper enclosure
+                    if re.match(r"^\s*#\s*define\s+\w+\s*\([^)]*\)\s+\(", content):
+                        # Macro with arguments that starts with ( - check if it's a compound literal
+                        if re.search(r"\)\s+\([^)]*\]\s*\)\s*\{", content) or \
+                           re.search(r"\)\s+\(const\s+", content) or \
+                           re.search(r"\)\s+\(enum\s+", content) or \
+                           re.search(r"\)\s+\(struct\s+", content):
+                            self.add_result(
+                                "ERROR", "COMPLEX_MACRO",
+                                "Macros with complex values should be enclosed in parentheses",
+                                filename, line_num, content
+                            )
+
+                    # SPACING: missing space before ( in control statements
+                    if re.search(r"\b(if|while|for|switch)\(", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space required before the open parenthesis '('",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited after open square bracket
+                    if re.search(r"\[\s+[^\]]", content) and not re.search(r"\[\s*\]", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited after that open square bracket '['",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited before close square bracket
+                    if re.search(r"[^\[]\s+\]", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited before that close square bracket ']'",
+                            filename, line_num, content
+                        )
+
+                    # RETURN_PARENTHESES: return with parentheses
+                    if re.search(r"\breturn\s*\([^;]+\)\s*;", content):
+                        # Avoid false positives for function calls like return (func())
+                        if not re.search(r"\breturn\s*\(\s*\w+\s*\([^)]*\)\s*\)\s*;", content):
+                            self.add_result(
+                                "WARNING", "RETURN_PARENTHESES",
+                                "return is not a function, parentheses are not required",
+                                filename, line_num, content
+                            )
+
+                    # BRACES: single statement blocks that need braces
+                    # Check for if/else/while/for without braces on multiline
+                    if re.match(r"^\s*(if|else\s+if|while|for)\s*\([^{]*$", content):
+                        # Control statement without opening brace - check next line
+                        pass  # Would need lookahead
+
+                    # INITIALISED_STATIC: static initialized to 0/NULL
+                    if re.match(r"^\s*static\s+.*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        self.add_result(
+                            "WARNING", "INITIALISED_STATIC",
+                            "do not initialise statics to 0 or NULL",
+                            filename, line_num, content
+                        )
+
+                    # GLOBAL_INITIALISERS: global initialized to 0/NULL
+                    if re.match(r"^[a-zA-Z_][a-zA-Z0-9_\s\*]*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        if not re.match(r"^\s*static\s+", content):
+                            self.add_result(
+                                "WARNING", "GLOBAL_INITIALISERS",
+                                "do not initialise globals to 0 or NULL",
+                                filename, line_num, content
+                            )
+
+                    # DEEP_INDENTATION: more than 4-5 levels of tabs
+                    leading_tabs = len(content) - len(content.lstrip('\t'))
+                    if leading_tabs >= 5:
+                        self.add_result(
+                            "WARNING", "DEEP_INDENTATION",
+                            f"too many levels of indentation ({leading_tabs} tabs)",
+                            filename, line_num, content
+                        )
+
+                    # TRAILING_STATEMENTS: code on same line as } OR control statement
+                    if re.search(r"\}\s*[a-zA-Z_]", content) and not re.search(r"\}\s*(else|while)\b", content):
+                        self.add_result(
+                            "ERROR", "TRAILING_STATEMENTS",
+                            "trailing statements should be on next line",
+                            filename, line_num, content
+                        )
+                    # Also check for if/while/for with statement on same line (not opening brace)
+                    # Pattern: if (cond) statement; or if (cond) statement; /* comment */
+                    if re.search(r"\b(if|while|for)\s*\([^)]+\)\s+(?![\s{])[^;]*;", content):
+                        self.add_result(
+                            "ERROR", "TRAILING_STATEMENTS",
+                            "trailing statements should be on next line",
+                            filename, line_num, content
+                        )
+
+                    # CONSTANT_COMPARISON: Yoda conditions (constant on left)
+                    if re.search(r'\b(NULL|true|false)\s*[!=]=\s*[&*\w]', content) or \
+                       re.search(r'\(\s*0\s*[!=]=\s*[&*\w]', content):
+                        self.add_result(
+                            "WARNING", "CONSTANT_COMPARISON",
+                            "Comparisons should place the constant on the right side",
+                            filename, line_num, content
+                        )
+
+                    # BRACES: single statement block should not have braces (or vice versa)
+                    # Check for if/else/while/for with single statement in braces
+                    if re.match(r"^\s*(if|while|for)\s*\([^)]+\)\s*\{\s*$", prev_line):
+                        if re.match(r"^\s*\w.*;\s*$", content) and not re.search(r"^\s*(if|else|while|for|switch|case|default|return\s*;)", content):
+                            # Check if next line is just closing brace - would need lookahead
+                            pass
+
+                    # ONE_SEMICOLON: double semicolon
+                    if re.search(r";;", content) and not re.search(r"for\s*\([^)]*;;", content):
+                        self.add_result(
+                            "WARNING", "ONE_SEMICOLON",
+                            "Statements terminations use 1 semicolon",
+                            filename, line_num, content
+                        )
+
+                    # CODE_INDENT/LEADING_SPACE: spaces used for indentation instead of tabs
+                    if re.match(r"^    +[^\s]", content) and not content.strip().startswith("*"):
+                        # Line starts with spaces (not tabs) - but allow for alignment in comments
+                        self.add_result(
+                            "WARNING", "CODE_INDENT",
+                            "code indent should use tabs where possible",
+                            filename, line_num, content
+                        )
+
+                    # LEADING_SPACE: spaces at start of line (more general)
+                    if re.match(r"^ +\t", content):
+                        self.add_result(
+                            "WARNING", "LEADING_SPACE",
+                            "please, no spaces at the start of a line",
+                            filename, line_num, content
+                        )
+
+                    # LINE_CONTINUATIONS: backslash continuation outside macros
+                    if content.rstrip().endswith("\\") and not re.match(r"^\s*#", content):
+                        # Not a preprocessor line but has continuation
+                        if not re.match(r"^\s*#\s*define", prev_line):
+                            self.add_result(
+                                "WARNING", "LINE_CONTINUATIONS",
+                                "Avoid unnecessary line continuations",
+                                filename, line_num, content
+                            )
+
+                    # FUNCTION_WITHOUT_ARGS: empty parens instead of (void)
+                    if is_header and re.search(r"\b\w+\s*\(\s*\)\s*;", content):
+                        if not re.search(r"\b(while|if|for|switch|return)\s*\(\s*\)", content):
+                            self.add_result(
+                                "ERROR", "FUNCTION_WITHOUT_ARGS",
+                                "Bad function definition - use (void) instead of ()",
+                                filename, line_num, content
+                            )
+
+                    # INLINE_LOCATION: inline should come after storage class
+                    if re.match(r"^\s*inline\s+(static|extern)", content):
+                        self.add_result(
+                            "ERROR", "INLINE_LOCATION",
+                            "inline keyword should sit between storage class and type",
+                            filename, line_num, content
+                        )
+
+                    # STATIC_CONST: const should come after static
+                    if re.match(r"^\s*const\s+static\b", content):
+                        self.add_result(
+                            "WARNING", "STATIC_CONST",
+                            "Move const after static - use 'static const'",
+                            filename, line_num, content
+                        )
+                        self.add_result(
+                            "WARNING", "STORAGE_CLASS",
+                            "storage class should be at the beginning of the declaration",
+                            filename, line_num, content
+                        )
+
+                    # CONST_CONST: const used twice
+                    if re.search(r"\bconst\s+\w+\s+const\b", content):
+                        self.add_result(
+                            "WARNING", "CONST_CONST",
+                            "const used twice - remove duplicate const",
+                            filename, line_num, content
+                        )
+
+                    # SELF_ASSIGNMENT: x = x
+                    if re.search(r"\b(\w+)\s*=\s*\1\s*;", content):
+                        match = re.search(r"\b(\w+)\s*=\s*\1\s*;", content)
+                        if match:
+                            self.add_result(
+                                "WARNING", "SELF_ASSIGNMENT",
+                                f"Do not use self-assignments to avoid compiler warnings",
+                                filename, line_num, content
+                            )
+
+                    # PREFER_DEFINED_ATTRIBUTE_MACRO: prefer DPDK/kernel macros over __attribute__
+                    attr_macros = {
+                        'cold': '__rte_cold',
+                        'hot': '__rte_hot', 
+                        'noinline': '__rte_noinline',
+                        'always_inline': '__rte_always_inline',
+                        'unused': '__rte_unused',
+                        'packed': '__rte_packed',
+                        'aligned': '__rte_aligned',
+                        'weak': '__rte_weak',
+                        'pure': '__rte_pure',
+                    }
+                    for attr, replacement in attr_macros.items():
+                        if re.search(rf'__attribute__\s*\(\s*\(\s*{attr}\b', content):
+                            self.add_result(
+                                "WARNING", "PREFER_DEFINED_ATTRIBUTE_MACRO",
+                                f"Prefer {replacement} over __attribute__(({attr}))",
+                                filename, line_num, content
+                            )
+
+                    # POINTER_LOCATION: char* instead of char *
+                    if re.search(r"\b(char|int|void|short|long|float|double|unsigned|signed)\*\s+\w", content):
+                        self.add_result(
+                            "ERROR", "POINTER_LOCATION",
+                            "\"foo* bar\" should be \"foo *bar\"",
+                            filename, line_num, content
+                        )
+
+                    # MACRO_WITH_FLOW_CONTROL: macros with return/goto/break
+                    if re.match(r"^\s*#\s*define\s+\w+.*\b(return|goto|break|continue)\b", content):
+                        self.add_result(
+                            "WARNING", "MACRO_WITH_FLOW_CONTROL",
+                            "Macros with flow control statements should be avoided",
+                            filename, line_num, content
+                        )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros with multiple statements
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+.*;\s*[^\\]", content):
+                        if not re.search(r"do\s*\{", content):
+                            self.add_result(
+                                "WARNING", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                                "Macros with multiple statements should use do {} while(0)",
+                                filename, line_num, content
+                            )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros starting with if
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+if\s*\(", content):
+                        self.add_result(
+                            "ERROR", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                            "Macros starting with if should be enclosed by a do - while loop",
+                            filename, line_num, content
+                        )
+
+                    # Multiple statements on one line
+                    if re.search(r";\s*[a-zA-Z_]", content) and "for" not in content:
+                        self.add_result(
+                            "CHECK", "MULTIPLE_STATEMENTS",
+                            "multiple statements on one line",
+                            filename, line_num, content
+                        )
+
+                    # Check for C99 comments in headers that should use C89
+                    if is_header and "//" in content:
+                        # Only flag if not in a string
+                        stripped = re.sub(r'"[^"]*"', '', content)
+                        if "//" in stripped:
+                            self.add_result(
+                                "CHECK", "C99_COMMENTS",
+                                "C99 // comments are acceptable but /* */ is preferred in headers",
+                                filename, line_num, content
+                            )
+
+                    # BLOCK_COMMENT_STYLE: block comments style issues
+                    # Leading /* on its own line
+                    if re.match(r"^\s*/\*\*+\s*$", content):
+                        self.add_result(
+                            "WARNING", "BLOCK_COMMENT_STYLE",
+                            "Block comments should not use a leading /* on a line by itself",
+                            filename, line_num, content
+                        )
+                    # Trailing */ on separate line after block comment
+                    if re.match(r"^\s*\*+/\s*$", content) and prev_line.strip().startswith("*"):
+                        pass  # This is actually acceptable
+                    # Block with trailing */ but content before it
+                    if re.search(r"\S\s*=+\s*\*/\s*$", content):
+                        self.add_result(
+                            "WARNING", "BLOCK_COMMENT_STYLE",
+                            "Block comments use a trailing */ on a separate line",
+                            filename, line_num, content
+                        )
+
+                    # REPEATED_WORD: check for repeated words
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        # Skip common valid repeated patterns
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                    # STRING_FRAGMENTS: unnecessary string concatenation
+                    if re.search(r'"\s+"', content) and not re.search(r'#\s*define', content):
+                        self.add_result(
+                            "CHECK", "STRING_FRAGMENTS",
+                            "Consecutive strings are generally better as a single string",
+                            filename, line_num, content
+                        )
+
+                prev_line = content
+
+    def check_spelling(self, patch_info: PatchInfo) -> None:
+        """Check for spelling errors using codespell dictionary."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # REPEATED_WORD check for non-C files (C files handled in check_coding_style)
+                if not filename.endswith((".c", ".h")):
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                # Spelling check
+                if self.spelling_dict:
+                    words = re.findall(r'\b[a-zA-Z]+\b', content)
+                    for word in words:
+                        lower_word = word.lower()
+                        if lower_word in self.spelling_dict:
+                            self.add_result(
+                                "WARNING", "TYPO_SPELLING",
+                                f"'{word}' may be misspelled - perhaps '{self.spelling_dict[lower_word]}'?",
+                                filename, line_num, content
+                            )
+
+    def check_forbidden_tokens(self, patch_info: PatchInfo) -> None:
+        """Check for DPDK-specific forbidden tokens."""
+        for filename, lines in patch_info.added_lines.items():
+            for rule in self.forbidden_rules:
+                # Check if file is in one of the target folders
+                in_folder = False
+                for folder in rule["folders"]:
+                    if filename.startswith(folder + "/") or filename.startswith("b/" + folder + "/"):
+                        in_folder = True
+                        break
+
+                if not in_folder:
+                    continue
+
+                # Check if file should be skipped
+                skip = False
+                for skip_pattern in rule.get("skip_files", []):
+                    if re.search(skip_pattern, filename):
+                        skip = True
+                        break
+
+                if skip:
+                    continue
+
+                # Check each line for forbidden patterns
+                for line_num, content in lines:
+                    for pattern in rule["patterns"]:
+                        if re.search(pattern, content):
+                            self.add_result(
+                                "WARNING", "FORBIDDEN_TOKEN",
+                                rule["message"],
+                                filename, line_num, content
+                            )
+                            break
+
+    def check_experimental_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_experimental tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_experimental" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            f"Please only put __rte_experimental tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_experimental":
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            "__rte_experimental must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_internal_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_internal tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_internal" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            f"Please only put __rte_internal tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_internal":
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            "__rte_internal must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_aligned_attributes(self, patch_info: PatchInfo) -> None:
+        """Check alignment attribute usage."""
+        align_tokens = ["__rte_aligned", "__rte_cache_aligned", "__rte_cache_min_aligned"]
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                for token in align_tokens:
+                    if re.search(rf"\b{token}\b", content):
+                        # Should only be used with struct or union
+                        if not re.search(rf"\b(struct|union)\s*{token}\b", content):
+                            self.add_result(
+                                "WARNING", "ALIGNED_ATTRIBUTE",
+                                f"Please use {token} only for struct or union types alignment",
+                                filename, line_num, content
+                            )
+
+    def check_packed_attributes(self, patch_info: PatchInfo) -> None:
+        """Check packed attribute usage."""
+        begin_count = 0
+        end_count = 0
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_packed_begin" in content:
+                    begin_count += 1
+                    # Should be after struct, union, or alignment attributes
+                    if not re.search(r"\b(struct|union)\s*__rte_packed_begin\b", content) and \
+                       not re.search(r"__rte_cache_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_cache_min_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_aligned\(.*\)\s*__rte_packed_begin", content):
+                        self.add_result(
+                            "WARNING", "PACKED_ATTRIBUTE",
+                            "Use __rte_packed_begin only after struct, union or alignment attributes",
+                            filename, line_num, content
+                        )
+
+                if "__rte_packed_end" in content:
+                    end_count += 1
+
+        if begin_count != end_count:
+            self.add_result(
+                "WARNING", "PACKED_ATTRIBUTE",
+                "__rte_packed_begin and __rte_packed_end should always be used in pairs"
+            )
+
+    def check_patch(self, content: str, patch_file: str = None) -> bool:
+        """Run all checks on a patch."""
+        self.results = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Check patch format first
+        self.check_patch_format(content, patch_file)
+
+        patch_info = self.parse_patch(content)
+
+        # Run all checks
+        self.check_signoff(patch_info)
+        self.check_line_length(patch_info)
+        self.check_trailing_whitespace(patch_info)
+        self.check_tabs_spaces(patch_info)
+        self.check_coding_style(patch_info)
+        self.check_spelling(patch_info)
+        self.check_forbidden_tokens(patch_info)
+        self.check_experimental_tags(patch_info)
+        self.check_internal_tags(patch_info)
+        self.check_aligned_attributes(patch_info)
+        self.check_packed_attributes(patch_info)
+        self.check_commit_message(patch_info, content)
+
+        return self.errors == 0 and self.warnings == 0
+
+    def check_patch_format(self, content: str, patch_file: str = None) -> None:
+        """Check basic patch format for corruption."""
+        lines = content.split("\n")
+
+        # Track patch structure
+        has_diff = False
+        has_hunk = False
+        in_hunk = False
+        hunk_line = 0
+
+        for i, line in enumerate(lines, 1):
+            # Track diff headers
+            if line.startswith("diff --git"):
+                has_diff = True
+                in_hunk = False
+
+            # Parse hunk header
+            if line.startswith("@@"):
+                has_hunk = True
+                in_hunk = True
+                hunk_line = i
+                # Validate hunk header format
+                if not re.match(r"@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@", line):
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (malformed hunk header) at line {i}"
+                    )
+
+            # End of patch content (signature separator)
+            elif line == "-- ":
+                in_hunk = False
+
+            # Check for lines that look like they should be in a hunk but aren't prefixed
+            elif in_hunk and line and not line.startswith(("+", "-", " ", "\\", "diff ", "@@", "index ", "--- ", "+++ ", "new file", "deleted file", "old mode", "new mode", "rename ", "similarity", "copy ")):
+                # This could be a wrapped line or corruption
+                # But be careful - empty lines and commit message lines are OK
+                if not line.startswith(("From ", "Subject:", "Date:", "Signed-off-by:",
+                                       "Acked-by:", "Reviewed-by:", "Tested-by:",
+                                       "Fixes:", "Cc:", "---", "Message-Id:")):
+                    # Likely a corrupted/wrapped line in the diff
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (line wrapped?) at line {i}"
+                    )
+                    in_hunk = False  # Stop checking this hunk
+
+        if has_diff and not has_hunk:
+            self.add_result(
+                "ERROR", "CORRUPTED_PATCH",
+                "Patch appears to be corrupted (has diff but no hunks)"
+            )
+
+        # Check for DOS line endings
+        if "\r\n" in content:
+            self.add_result(
+                "ERROR", "DOS_LINE_ENDINGS",
+                "Patch has DOS line endings, should be UNIX line endings"
+            )
+
+    def check_commit_message(self, patch_info: PatchInfo, content: str) -> None:
+        """Check commit message for issues."""
+        lines = content.split("\n")
+
+        in_commit_msg = False
+        commit_msg_lines = []
+
+        for i, line in enumerate(lines):
+            if line.startswith("Subject:"):
+                in_commit_msg = True
+                continue
+            if line.startswith("---") or line.startswith("diff --git"):
+                in_commit_msg = False
+                continue
+            if in_commit_msg:
+                commit_msg_lines.append((i + 1, line))
+
+        for line_num, line in commit_msg_lines:
+            # UNKNOWN_COMMIT_ID: Fixes tag with short or invalid commit ID
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                commit_id = match.group(1)
+                if len(commit_id) < 12:
+                    self.add_result(
+                        "WARNING", "UNKNOWN_COMMIT_ID",
+                        f"Commit id '{commit_id}' is too short, use at least 12 characters",
+                        line_num=line_num, line_content=line
+                    )
+                # Check Fixes format: should be Fixes: <hash> ("commit subject")
+                if not re.match(r'^Fixes:\s+[0-9a-fA-F]{12,}\s+\("[^"]+"\)\s*$', line):
+                    self.add_result(
+                        "WARNING", "BAD_FIXES_TAG",
+                        "Fixes: tag format should be: Fixes: <12+ char hash> (\"commit subject\")",
+                        line_num=line_num, line_content=line
+                    )
+
+    def format_results(self, show_types: bool = True) -> str:
+        """Format the results for output."""
+        output = []
+
+        for result in self.results:
+            if result.filename and result.line_num:
+                prefix = f"{result.filename}:{result.line_num}:"
+            elif result.filename:
+                prefix = f"{result.filename}:"
+            else:
+                prefix = ""
+
+            type_str = f" [{result.type_name}]" if show_types else ""
+            output.append(f"{result.level}:{type_str} {result.message}")
+
+            if prefix:
+                output.append(f"#  {prefix}")
+            if result.line_content:
+                output.append(f"+  {result.line_content}")
+            output.append("")
+
+        return "\n".join(output)
+
+    def get_summary(self) -> str:
+        """Get a summary of the check results."""
+        return f"total: {self.errors} errors, {self.warnings} warnings, {self.checks} checks, {self.lines_checked} lines checked"
+
+
+def check_single_patch(checker: CheckPatch, patch_path: Optional[str],
+                       commit: Optional[str], verbose: bool, quiet: bool) -> bool:
+    """Check a single patch file or commit."""
+    subject = ""
+    content = ""
+
+    if patch_path:
+        try:
+            with open(patch_path, "r", encoding="utf-8", errors="replace") as f:
+                content = f.read()
+        except IOError as e:
+            print(f"Error reading {patch_path}: {e}", file=sys.stderr)
+            return False
+    elif commit:
+        try:
+            result = subprocess.run(
+                ["git", "format-patch", "--find-renames", "--no-stat", "--stdout", "-1", commit],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode != 0:
+                print(f"Error getting commit {commit}", file=sys.stderr)
+                return False
+            content = result.stdout
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            print(f"Error running git: {e}", file=sys.stderr)
+            return False
+    else:
+        content = sys.stdin.read()
+
+    # Extract subject
+    match = re.search(r"^Subject:\s*(.+?)(?:\n(?=\S)|\n\n)", content, re.MULTILINE | re.DOTALL)
+    if match:
+        subject = match.group(1).replace("\n ", " ").strip()
+
+    if verbose:
+        print(f"\n### {subject}\n")
+
+    is_clean = checker.check_patch(content, patch_path)
+    has_issues = checker.errors > 0 or checker.warnings > 0
+
+    if has_issues or verbose:
+        if not verbose and subject:
+            print(f"\n### {subject}\n")
+        print(checker.format_results(show_types=True))
+        print(checker.get_summary())
+
+    return is_clean
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Check patches for DPDK coding style and common issues",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s patch.diff                Check a patch file
+  %(prog)s -n 3                      Check last 3 commits
+  %(prog)s -r origin/main..HEAD      Check commits in range
+  cat patch.diff | %(prog)s          Check patch from stdin
+"""
+    )
+
+    parser.add_argument("patches", nargs="*", help="Patch files to check")
+    parser.add_argument("-n", type=int, metavar="NUM",
+                       help="Check last NUM commits")
+    parser.add_argument("-r", "--range", metavar="RANGE",
+                       help="Check commits in git range (default: origin/main..)")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                       help="Quiet mode - only show summary")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                       help="Verbose mode - show all checks")
+    parser.add_argument("--max-line-length", type=int, default=DEFAULT_LINE_LENGTH,
+                       help=f"Maximum line length (default: {DEFAULT_LINE_LENGTH})")
+    parser.add_argument("--codespell", action="store_true", default=True,
+                       help="Enable spell checking (default: enabled)")
+    parser.add_argument("--no-codespell", dest="codespell", action="store_false",
+                       help="Disable spell checking")
+    parser.add_argument("--codespellfile", metavar="FILE",
+                       help="Path to codespell dictionary")
+    parser.add_argument("--show-types", action="store_true", default=True,
+                       help="Show message types (default: enabled)")
+    parser.add_argument("--no-show-types", dest="show_types", action="store_false",
+                       help="Hide message types")
+
+    return parser.parse_args()
+
+
+def main():
+    """Main entry point."""
+    args = parse_args()
+
+    # Build configuration
+    config = {
+        "max_line_length": args.max_line_length,
+        "codespell": args.codespell,
+        "show_types": args.show_types,
+    }
+
+    if args.codespellfile:
+        config["codespell_file"] = args.codespellfile
+
+    checker = CheckPatch(config)
+
+    total = 0
+    failed = 0
+
+    if args.patches:
+        # Check specified patch files
+        for patch in args.patches:
+            total += 1
+            if not check_single_patch(checker, patch, None, args.verbose, args.quiet):
+                failed += 1
+
+    elif args.n or args.range:
+        # Check git commits
+        if args.n:
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", f"--max-count={args.n}", "HEAD"],
+                capture_output=True,
+                text=True
+            )
+        else:
+            git_range = args.range if args.range else "origin/main.."
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", git_range],
+                capture_output=True,
+                text=True
+            )
+
+        if result.returncode != 0:
+            print("Error getting git commits", file=sys.stderr)
+            sys.exit(1)
+
+        commits = result.stdout.strip().split("\n")
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    elif not sys.stdin.isatty():
+        # Read from stdin
+        total = 1
+        if not check_single_patch(checker, None, None, args.verbose, args.quiet):
+            failed += 1
+
+    else:
+        # Default to checking commits since origin/main
+        result = subprocess.run(
+            ["git", "rev-list", "--reverse", "origin/main.."],
+            capture_output=True,
+            text=True
+        )
+
+        commits = result.stdout.strip().split("\n") if result.stdout.strip() else []
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    # Print summary
+    passed = total - failed
+    if not args.quiet:
+        print(f"\n{passed}/{total} valid patch{'es' if passed != 1 else ''}")
+
+    sys.exit(failed)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC] devtools: replace get-maintainer shell wrapper with Python script
  2026-02-01 19:01   ` Stephen Hemminger
@ 2026-02-01 20:16     ` Thomas Monjalon
  2026-02-01 22:23       ` Stephen Hemminger
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Monjalon @ 2026-02-01 20:16 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev, Bruce Richardson, Aaron Conole, David Marchand

01/02/2026 20:01, Stephen Hemminger:
> On Sun, 01 Feb 2026 14:51:01 +0100
> Thomas Monjalon <thomas@monjalon.net> wrote:
> 
> > 31/01/2026 21:48, Stephen Hemminger:
> > > DPDK has been reusing the Linux kernel get_maintainer perl script
> > > but that creates an unwanted dependency on kernel source.
> > > 
> > > This new script replaces that with a standalone Python implementation
> > > created in a few minutes with AI. The command line arguments are
> > > a subset of the features that make sense in DPDK.  
> > 
> > Almost thousand lines for this new script.
> > Are you sure that's something we want to maintain ourself?
> 
> It really is less bad than the awk mess.
> And the kernel often changes the rules.
> 
> The bigger issue is that the python version is not detecting everything yet.

Our wrapper is very simple.
How much an issue is this dependency? What others think?




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] devtools: replace get-maintainer shell wrapper with Python script
  2026-02-01 20:16     ` Thomas Monjalon
@ 2026-02-01 22:23       ` Stephen Hemminger
  0 siblings, 0 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-02-01 22:23 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev, Bruce Richardson, Aaron Conole, David Marchand

On Sun, 01 Feb 2026 21:16:52 +0100
Thomas Monjalon <thomas@monjalon.net> wrote:

> 01/02/2026 20:01, Stephen Hemminger:
> > On Sun, 01 Feb 2026 14:51:01 +0100
> > Thomas Monjalon <thomas@monjalon.net> wrote:
> >   
> > > 31/01/2026 21:48, Stephen Hemminger:  
> > > > DPDK has been reusing the Linux kernel get_maintainer perl script
> > > > but that creates an unwanted dependency on kernel source.
> > > > 
> > > > This new script replaces that with a standalone Python implementation
> > > > created in a few minutes with AI. The command line arguments are
> > > > a subset of the features that make sense in DPDK.    
> > > 
> > > Almost thousand lines for this new script.
> > > Are you sure that's something we want to maintain ourself?  
> > 
> > It really is less bad than the awk mess.
> > And the kernel often changes the rules.
> > 
> > The bigger issue is that the python version is not detecting everything yet.  
> 
> Our wrapper is very simple.
> How much an issue is this dependency? What others think?

I started looking because of the patch suggestion to auto download
from kernel repo which seemed like an awkward way to solve the problem.

Of the two patches, get-maintainer and checkpatches my feelings are different for
each.

Get-maintainer is just a light wrapper, so probably ok to keep the original
shell script; but DPDK should consider adding other fields about support status
etc. The shell version is 34 line wrapper that calls 2655 line perl script.
There are some policy questions like does DPDK subsystem work like kernel
subsystem, but so far DPDK is fine. Evolution of the kernel version has
been slow; so developers are unlikely to get broken by having old or newer
version of kernel script.

Checkpatches has grown into a slow beast. With multiple awk calls and lots
of copy/paste repetition. The pure Python version is actually easier to read
and support. The shell part of checkpatches is 559 lines and the kernel
part is 7882; in total pretty big. The problem is that kernel version of checkpatches
changes often, and in fact the current DPDK shell script has ignores for
things that are no longer present or have changed. It is not uncommon for
CI to give different answers than running locally with current upstream.
Diverging for this script is overdue.

At this point, the patches are more a "what would it look like if"
to start the discussion.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [RFC v3] devtools: replace checkpatches shell wrapper with Python script
  2026-01-31 20:48 [RFC] devtools: replace get-maintainer shell wrapper with Python script Stephen Hemminger
  2026-02-01 13:51 ` Thomas Monjalon
  2026-02-01 19:22 ` [RFC v2] devtools: replace checkpatches " Stephen Hemminger
@ 2026-02-03 14:17 ` Stephen Hemminger
  2026-02-04 16:59 ` [PATCH v4] " Stephen Hemminger
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-02-03 14:17 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Replace the checkpatches.sh shell script that wraps the Linux kernel's
checkpatch.pl with a standalone Python implementation. The new
checkpatches.py script provides equivalent functionality without
requiring the kernel source tree or Perl.

Performance is significantly improved: checking a recent flow parser
patch series dropped from 2m23s to 0.43s (approximately 300x faster).
The script is in a beta test state, it finds the same or more
issues as the original.

Features:

Patch validation:
  - Format validation and corruption detection
  - Signed-off-by verification
  - Fixes tag format checking
  - Line length and trailing whitespace checks

Kernel checkpatch.pl compatible checks:
  - AVOID_EXTERNS, UNNECESSARY_BREAK, COMPLEX_MACRO
  - STRNCPY/STRCPY, RETURN_PARENTHESES, POINTER_LOCATION
  - INITIALISED_STATIC, GLOBAL_INITIALISERS, DEEP_INDENTATION
  - TRAILING_STATEMENTS, LINE_CONTINUATIONS, ONE_SEMICOLON
  - REPEATED_WORD, CONSTANT_COMPARISON, SELF_ASSIGNMENT
  - INLINE_LOCATION, STORAGE_CLASS, FUNCTION_WITHOUT_ARGS
  - MACRO_WITH_FLOW_CONTROL, MULTISTATEMENT_MACRO_USE_DO_WHILE
  - PREFER_DEFINED_ATTRIBUTE_MACRO (using DPDK __rte_* macros)
  - TYPO_SPELLING via codespell dictionary

DPDK-specific forbidden token checks:
  - RTE_LOG usage (prefer RTE_LOG_LINE)
  - printf/fprintf to stdout/stderr in libs/drivers
  - rte_panic/rte_exit, direct __attribute__ usage
  - Deprecated atomics (rte_atomicNN_xxx, rte_smp_[rw]mb)
  - Compiler builtins (__sync_xxx, __atomic_xxx, __builtin_xxx)
  - pthread functions (prefer rte_thread)
  - Reserved keywords, pragma, variadic macros

DPDK tag validation:
  - __rte_experimental and __rte_internal placement
  - __rte_packed_begin/end pairing
  - __rte_aligned attribute checking

Git integration:
  - Check commits by range (-r) or count (-n)
  - Read patches from files or stdin

Known limitations:
  - BRACES check not implemented (requires multi-line analysis)

This script was developed with assistance from the Claude AI assistant.

v3 - update to make sure that same things are caught as original code.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 devtools/checkpatches.py | 1303 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 1303 insertions(+)
 create mode 100755 devtools/checkpatches.py

diff --git a/devtools/checkpatches.py b/devtools/checkpatches.py
new file mode 100755
index 0000000000..da9d95b736
--- /dev/null
+++ b/devtools/checkpatches.py
@@ -0,0 +1,1303 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2015 6WIND S.A.
+# Copyright 2025 - Python rewrite
+#
+# checkpatch.py - Check patches for common style issues
+#
+# This is a standalone Python replacement for the DPDK checkpatches.sh
+# script that previously wrapped the Linux kernel's checkpatch.pl.
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+VERSION = "1.0"
+
+# Default configuration
+DEFAULT_LINE_LENGTH = 100
+DEFAULT_CODESPELL_DICT = "/usr/share/codespell/dictionary.txt"
+
+
+@dataclass
+class CheckResult:
+    """Result of a single check."""
+    level: str  # ERROR, WARNING, CHECK
+    type_name: str
+    message: str
+    filename: str = ""
+    line_num: int = 0
+    line_content: str = ""
+
+
+@dataclass
+class PatchInfo:
+    """Information extracted from a patch."""
+    subject: str = ""
+    author: str = ""
+    author_email: str = ""
+    signoffs: list = field(default_factory=list)
+    files: list = field(default_factory=list)
+    added_lines: dict = field(default_factory=dict)  # filename -> [(line_num, content)]
+    has_fixes_tag: bool = False
+    fixes_commits: list = field(default_factory=list)
+
+
+class CheckPatch:
+    """Main class for checking patches."""
+
+    def __init__(self, config: dict):
+        self.config = config
+        self.results: list[CheckResult] = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Load codespell dictionary if enabled
+        self.spelling_dict = {}
+        if config.get("codespell"):
+            self._load_codespell_dict()
+
+        # Forbidden token rules for DPDK
+        self.forbidden_rules = self._init_forbidden_rules()
+
+    def _load_codespell_dict(self) -> None:
+        """Load the codespell dictionary."""
+        dict_path = self.config.get("codespell_file")
+
+        if not dict_path:
+            # Search common locations for the dictionary
+            search_paths = [
+                DEFAULT_CODESPELL_DICT,
+                "/usr/local/lib/python3.12/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.11/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.10/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/lib/python3/dist-packages/codespell_lib/data/dictionary.txt",
+            ]
+
+            # Also try to find it via codespell module
+            try:
+                import codespell_lib
+                module_path = os.path.join(
+                    os.path.dirname(codespell_lib.__file__),
+                    'data', 'dictionary.txt'
+                )
+                search_paths.insert(0, module_path)
+            except ImportError:
+                pass
+
+            for path in search_paths:
+                if os.path.exists(path):
+                    dict_path = path
+                    break
+
+        if not dict_path or not os.path.exists(dict_path):
+            return
+
+        try:
+            with open(dict_path, "r", encoding="utf-8", errors="ignore") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith("#"):
+                        continue
+                    parts = line.split("->")
+                    if len(parts) >= 2:
+                        wrong = parts[0].strip().lower()
+                        correct = parts[1].strip().split(",")[0].strip()
+                        self.spelling_dict[wrong] = correct
+        except IOError:
+            pass
+
+    def _init_forbidden_rules(self) -> list:
+        """Initialize DPDK-specific forbidden token rules."""
+        return [
+            # Refrain from new calls to RTE_LOG in libraries
+            {
+                "folders": ["lib"],
+                "patterns": [r"RTE_LOG\("],
+                "message": "Prefer RTE_LOG_LINE",
+            },
+            # Refrain from new calls to RTE_LOG in drivers
+            {
+                "folders": ["drivers"],
+                "skip_files": [r".*osdep\.h$"],
+                "patterns": [r"RTE_LOG\(", r"RTE_LOG_DP\(", r"rte_log\("],
+                "message": "Prefer RTE_LOG_LINE/RTE_LOG_DP_LINE",
+            },
+            # No output on stdout or stderr
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bprintf\b", r"fprintf\(stdout,", r"fprintf\(stderr,"],
+                "message": "Writing to stdout or stderr",
+            },
+            # Refrain from rte_panic() and rte_exit()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_panic\(", r"rte_exit\("],
+                "message": "Using rte_panic/rte_exit",
+            },
+            # Don't call directly install_headers()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\binstall_headers\b"],
+                "message": "Using install_headers()",
+            },
+            # Refrain from using compiler attribute without common macro
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"__attribute__"],
+                "message": "Using compiler attribute directly",
+            },
+            # Check %l or %ll format specifier
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"%ll*[xud]"],
+                "message": "Using %l format, prefer %PRI*64 if type is [u]int64_t",
+            },
+            # Refrain from 16/32/64 bits rte_atomicNN_xxx()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_atomic[0-9][0-9]_.*\("],
+                "message": "Using rte_atomicNN_xxx",
+            },
+            # Refrain from rte_smp_[r/w]mb()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_smp_(r|w)?mb\("],
+                "message": "Using rte_smp_[r/w]mb",
+            },
+            # Refrain from __sync_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__sync_.*\("],
+                "message": "Using __sync_xxx builtins",
+            },
+            # Refrain from __rte_atomic_thread_fence()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__rte_atomic_thread_fence\("],
+                "message": "Using __rte_atomic_thread_fence, prefer rte_atomic_thread_fence",
+            },
+            # Refrain from __atomic_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"drivers/common/cnxk/"],
+                "patterns": [r"__atomic_.*\(", r"__ATOMIC_(RELAXED|CONSUME|ACQUIRE|RELEASE|ACQ_REL|SEQ_CST)"],
+                "message": "Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx",
+            },
+            # Refrain from some pthread functions
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"pthread_(create|join|detach|set(_?name_np|affinity_np)|attr_set(inheritsched|schedpolicy))\("],
+                "message": "Using pthread functions, prefer rte_thread",
+            },
+            # Forbid use of __reserved
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__reserved\b"],
+                "message": "Using __reserved",
+            },
+            # Forbid use of __alignof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__alignof__\b"],
+                "message": "Using __alignof__, prefer C11 alignof",
+            },
+            # Forbid use of __typeof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__typeof__\b"],
+                "message": "Using __typeof__, prefer typeof",
+            },
+            # Forbid use of __builtin_*
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/", r"drivers/.*/base/", r"drivers/.*osdep\.h$"],
+                "patterns": [r"\b__builtin_"],
+                "message": "Using __builtin helpers, prefer EAL macros",
+            },
+            # Forbid inclusion of linux/pci_regs.h
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"include.*linux/pci_regs\.h"],
+                "message": "Using linux/pci_regs.h, prefer rte_pci.h",
+            },
+            # Forbid variadic argument pack extension in macros
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"#\s*define.*[^(,\s]\.\.\.[\s]*\)"],
+                "message": "Do not use variadic argument pack in macros",
+            },
+            # Forbid __rte_packed_begin with enums
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"enum.*__rte_packed_begin"],
+                "message": "Using __rte_packed_begin with enum is not allowed",
+            },
+            # Forbid use of #pragma
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"(#pragma|_Pragma)"],
+                "message": "Using compilers pragma is not allowed",
+            },
+            # Forbid experimental build flag except in examples
+            {
+                "folders": ["lib", "drivers", "app"],
+                "patterns": [r"-DALLOW_EXPERIMENTAL_API", r"allow_experimental_apis"],
+                "message": "Using experimental build flag for in-tree compilation",
+            },
+            # Refrain from using RTE_LOG_REGISTER for drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bRTE_LOG_REGISTER\b"],
+                "message": "Using RTE_LOG_REGISTER, prefer RTE_LOG_REGISTER_(DEFAULT|SUFFIX)",
+            },
+            # Forbid non-internal thread in drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_thread_(set_name|create_control)\("],
+                "message": "Prefer rte_thread_(set_prefixed_name|create_internal_control)",
+            },
+        ]
+
+    def add_result(self, level: str, type_name: str, message: str,
+                   filename: str = "", line_num: int = 0, line_content: str = "") -> None:
+        """Add a check result."""
+        result = CheckResult(
+            level=level,
+            type_name=type_name,
+            message=message,
+            filename=filename,
+            line_num=line_num,
+            line_content=line_content
+        )
+        self.results.append(result)
+
+        if level == "ERROR":
+            self.errors += 1
+        elif level == "WARNING":
+            self.warnings += 1
+        else:
+            self.checks += 1
+
+    def parse_patch(self, content: str) -> PatchInfo:
+        """Parse a patch and extract information."""
+        info = PatchInfo()
+        current_file = ""
+        in_diff = False
+        line_num_in_new = 0
+
+        lines = content.split("\n")
+        for i, line in enumerate(lines):
+            # Extract subject
+            if line.startswith("Subject:"):
+                subject = line[8:].strip()
+                # Handle multi-line subjects
+                j = i + 1
+                while j < len(lines) and lines[j].startswith(" "):
+                    subject += " " + lines[j].strip()
+                    j += 1
+                info.subject = subject
+
+            # Extract author
+            if line.startswith("From:"):
+                info.author = line[5:].strip()
+                match = re.search(r"<([^>]+)>", info.author)
+                if match:
+                    info.author_email = match.group(1)
+
+            # Extract Signed-off-by
+            match = re.match(r"^Signed-off-by:\s*(.+)$", line, re.IGNORECASE)
+            if match:
+                info.signoffs.append(match.group(1).strip())
+
+            # Extract Fixes tag
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                info.has_fixes_tag = True
+                info.fixes_commits.append(match.group(1))
+
+            # Track files in diff
+            if line.startswith("diff --git"):
+                match = re.match(r"diff --git a/(\S+) b/(\S+)", line)
+                if match:
+                    current_file = match.group(2)
+                    if current_file not in info.files:
+                        info.files.append(current_file)
+                    info.added_lines[current_file] = []
+                in_diff = True
+
+            # Track hunks
+            if line.startswith("@@"):
+                match = re.match(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
+                if match:
+                    line_num_in_new = int(match.group(1))
+                continue
+
+            # Track added lines
+            if in_diff and current_file:
+                if line.startswith("+") and not line.startswith("+++"):
+                    info.added_lines[current_file].append((line_num_in_new, line[1:]))
+                    line_num_in_new += 1
+                elif line.startswith("-"):
+                    pass  # Deleted line, don't increment
+                elif not line.startswith("\\"):
+                    line_num_in_new += 1
+
+        return info
+
+    def check_line_length(self, patch_info: PatchInfo) -> None:
+        """Check for lines exceeding maximum length."""
+        max_len = self.config.get("max_line_length", DEFAULT_LINE_LENGTH)
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # Skip strings that span multiple lines
+                if len(content) > max_len:
+                    # Don't warn about long strings or URLs
+                    if '\"' in content and content.count('\"') >= 2:
+                        continue
+                    if "http://" in content or "https://" in content:
+                        continue
+                    # Check if it's a comment line
+                    if content.strip().startswith("/*") or content.strip().startswith("*") or content.strip().startswith("//"):
+                        self.add_result(
+                            "WARNING", "LONG_LINE_COMMENT",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+                    else:
+                        self.add_result(
+                            "WARNING", "LONG_LINE",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+
+    def check_trailing_whitespace(self, patch_info: PatchInfo) -> None:
+        """Check for trailing whitespace."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if content != content.rstrip():
+                    self.add_result(
+                        "WARNING", "TRAILING_WHITESPACE",
+                        "trailing whitespace",
+                        filename, line_num, content
+                    )
+
+    def check_tabs_spaces(self, patch_info: PatchInfo) -> None:
+        """Check for space before tab and mixed indentation."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if " \t" in content:
+                    self.add_result(
+                        "WARNING", "SPACE_BEFORE_TAB",
+                        "space before tab in indent",
+                        filename, line_num, content
+                    )
+
+    def check_signoff(self, patch_info: PatchInfo) -> None:
+        """Check for Signed-off-by line."""
+        if not patch_info.signoffs:
+            self.add_result(
+                "ERROR", "MISSING_SIGN_OFF",
+                "Missing Signed-off-by: line(s)"
+            )
+
+    def check_coding_style(self, patch_info: PatchInfo) -> None:
+        """Check various coding style issues."""
+        for filename, lines in patch_info.added_lines.items():
+            # Skip non-C files for most checks
+            is_c_file = filename.endswith((".c", ".h"))
+            is_c_source = filename.endswith(".c")
+            is_header = filename.endswith(".h")
+
+            prev_line = ""
+            indent_stack = []
+            for line_num, content in lines:
+                self.lines_checked += 1
+
+                if is_c_file:
+                    # Check for externs in .c files
+                    if is_c_source and re.match(r"^\s*extern\b", content):
+                        self.add_result(
+                            "WARNING", "AVOID_EXTERNS",
+                            "externs should be avoided in .c files",
+                            filename, line_num, content
+                        )
+
+                    # Check for unnecessary break after goto/return/continue
+                    # Only flag if the previous statement is unconditional (not inside an if)
+                    if re.match(r"^\s*break\s*;", content):
+                        # Check if previous line is an unconditional return/goto/continue
+                        # It's unconditional if it starts at the same or lower indentation as break
+                        # or if it's a plain return/goto not inside an if block
+                        prev_stripped = prev_line.strip() if prev_line else ""
+                        if re.match(r"^(goto\s+\w+|return\b|continue)\s*[^;]*;\s*$", prev_stripped):
+                            # Check indentation - if prev line has same or less indentation, it's unconditional
+                            break_indent = len(content) - len(content.lstrip())
+                            prev_indent = len(prev_line) - len(prev_line.lstrip()) if prev_line else 0
+                            # Only flag if the return/goto is at the same indentation level
+                            # (meaning it's not inside a nested if block)
+                            if prev_indent <= break_indent:
+                                self.add_result(
+                                    "WARNING", "UNNECESSARY_BREAK",
+                                    "break is not useful after a goto or return",
+                                    filename, line_num, content
+                                )
+
+                    # STRNCPY: should use strlcpy
+                    if re.search(r"\bstrncpy\s*\(", content):
+                        self.add_result(
+                            "WARNING", "STRNCPY",
+                            "Prefer strlcpy over strncpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/",
+                            filename, line_num, content
+                        )
+
+                    # STRCPY: unsafe string copy
+                    if re.search(r"\bstrcpy\s*\(", content):
+                        self.add_result(
+                            "ERROR", "STRCPY",
+                            "strcpy is unsafe - use strlcpy or snprintf",
+                            filename, line_num, content
+                        )
+
+                    # Check for complex macros without proper enclosure
+                    # Note: Compound literal macros like (type[]){...} are valid C99
+                    # and commonly used in DPDK, so we don't flag those.
+                    # Only flag macros with multiple statements without do-while wrapping.
+                    if re.match(r"^\s*#\s*define\s+\w+\s*\([^)]*\)\s+\{", content):
+                        # Macro body starts with { but is not a compound literal
+                        # Check if it's missing do { } while(0)
+                        if not re.search(r"\bdo\s*\{", content):
+                            self.add_result(
+                                "ERROR", "COMPLEX_MACRO",
+                                "Macros with complex values should be enclosed in parentheses or do { } while(0)",
+                                filename, line_num, content
+                            )
+
+                    # SPACING: missing space before ( in control statements
+                    if re.search(r"\b(if|while|for|switch)\(", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space required before the open parenthesis '('",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited after open square bracket
+                    if re.search(r"\[\s+[^\]]", content) and not re.search(r"\[\s*\]", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited after that open square bracket '['",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited before close square bracket
+                    if re.search(r"[^\[]\s+\]", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited before that close square bracket ']'",
+                            filename, line_num, content
+                        )
+
+                    # RETURN_PARENTHESES: return with parentheses
+                    if re.search(r"\breturn\s*\([^;]+\)\s*;", content):
+                        # Avoid false positives for function calls like return (func())
+                        if not re.search(r"\breturn\s*\(\s*\w+\s*\([^)]*\)\s*\)\s*;", content):
+                            self.add_result(
+                                "WARNING", "RETURN_PARENTHESES",
+                                "return is not a function, parentheses are not required",
+                                filename, line_num, content
+                            )
+
+                    # BRACES: single statement blocks that need braces
+                    # Check for if/else/while/for without braces on multiline
+                    if re.match(r"^\s*(if|else\s+if|while|for)\s*\([^{]*$", content):
+                        # Control statement without opening brace - check next line
+                        pass  # Would need lookahead
+
+                    # INITIALISED_STATIC: static initialized to 0/NULL
+                    if re.match(r"^\s*static\s+.*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        self.add_result(
+                            "WARNING", "INITIALISED_STATIC",
+                            "do not initialise statics to 0 or NULL",
+                            filename, line_num, content
+                        )
+
+                    # GLOBAL_INITIALISERS: global initialized to 0/NULL
+                    if re.match(r"^[a-zA-Z_][a-zA-Z0-9_\s\*]*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        if not re.match(r"^\s*static\s+", content):
+                            self.add_result(
+                                "WARNING", "GLOBAL_INITIALISERS",
+                                "do not initialise globals to 0 or NULL",
+                                filename, line_num, content
+                            )
+
+                    # DEEP_INDENTATION: more than 5-6 levels of tabs
+                    # Only flag control flow statements (if/for/while/switch) at very deep indentation
+                    # Note: switch/case structures legitimately have deep nesting
+                    leading_tabs = len(content) - len(content.lstrip('\t'))
+                    if leading_tabs >= 6:
+                        stripped = content.strip()
+                        # Only warn for control flow statements that create new nesting
+                        # This indicates actual deep nesting, not just aligned continuations
+                        if stripped and re.match(r'^(if|for|while|switch)\s*\(', stripped):
+                            self.add_result(
+                                "WARNING", "DEEP_INDENTATION",
+                                f"too many levels of indentation ({leading_tabs} tabs)",
+                                filename, line_num, content
+                            )
+
+                    # TRAILING_STATEMENTS: code on same line as } OR control statement
+                    # But allow struct/union member declarations: } name; or } name; /* comment */
+                    if re.search(r"\}\s*[a-zA-Z_]", content) and not re.search(r"\}\s*(else|while)\b", content):
+                        # Check if this is a struct/union member declaration
+                        # Pattern: } identifier; or } identifier[]; or with comment
+                        if not re.search(r"\}\s*\w+\s*(\[\d*\])?\s*;\s*(/\*.*\*/|//.*)?\s*$", content):
+                            self.add_result(
+                                "ERROR", "TRAILING_STATEMENTS",
+                                "trailing statements should be on next line",
+                                filename, line_num, content
+                            )
+                    # Also check for if/while/for with statement on same line (not opening brace)
+                    # Pattern: if (cond) statement; or if (cond) statement; /* comment */
+                    if re.search(r"\b(if|while|for)\s*\([^)]+\)\s+(?![\s{])[^;]*;", content):
+                        self.add_result(
+                            "ERROR", "TRAILING_STATEMENTS",
+                            "trailing statements should be on next line",
+                            filename, line_num, content
+                        )
+
+                    # CONSTANT_COMPARISON: Yoda conditions (constant on left)
+                    if re.search(r'\b(NULL|true|false)\s*[!=]=\s*[&*\w]', content) or \
+                       re.search(r'\(\s*0\s*[!=]=\s*[&*\w]', content):
+                        self.add_result(
+                            "WARNING", "CONSTANT_COMPARISON",
+                            "Comparisons should place the constant on the right side",
+                            filename, line_num, content
+                        )
+
+                    # BRACES: single statement block should not have braces (or vice versa)
+                    # Check for if/else/while/for with single statement in braces
+                    if re.match(r"^\s*(if|while|for)\s*\([^)]+\)\s*\{\s*$", prev_line):
+                        if re.match(r"^\s*\w.*;\s*$", content) and not re.search(r"^\s*(if|else|while|for|switch|case|default|return\s*;)", content):
+                            # Check if next line is just closing brace - would need lookahead
+                            pass
+
+                    # ONE_SEMICOLON: double semicolon
+                    if re.search(r";;", content) and not re.search(r"for\s*\([^)]*;;", content):
+                        self.add_result(
+                            "WARNING", "ONE_SEMICOLON",
+                            "Statements terminations use 1 semicolon",
+                            filename, line_num, content
+                        )
+
+                    # CODE_INDENT/LEADING_SPACE: spaces used for indentation instead of tabs
+                    if re.match(r"^    +[^\s]", content) and not content.strip().startswith("*"):
+                        # Line starts with spaces (not tabs) - but allow for alignment in comments
+                        self.add_result(
+                            "WARNING", "CODE_INDENT",
+                            "code indent should use tabs where possible",
+                            filename, line_num, content
+                        )
+
+                    # LEADING_SPACE: spaces at start of line (more general)
+                    if re.match(r"^ +\t", content):
+                        self.add_result(
+                            "WARNING", "LEADING_SPACE",
+                            "please, no spaces at the start of a line",
+                            filename, line_num, content
+                        )
+
+                    # LINE_CONTINUATIONS: backslash continuation outside macros
+                    # Track if we're inside a macro (previous line ended with \)
+                    in_macro = prev_line and prev_line.rstrip().endswith("\\")
+                    if content.rstrip().endswith("\\") and not re.match(r"^\s*#", content):
+                        # Not a preprocessor directive but has continuation
+                        # Check if this is part of a macro definition
+                        if not in_macro and not re.match(r"^\s*#\s*define", prev_line):
+                            self.add_result(
+                                "WARNING", "LINE_CONTINUATIONS",
+                                "Avoid unnecessary line continuations",
+                                filename, line_num, content
+                            )
+
+                    # FUNCTION_WITHOUT_ARGS: empty parens instead of (void)
+                    if is_header and re.search(r"\b\w+\s*\(\s*\)\s*;", content):
+                        if not re.search(r"\b(while|if|for|switch|return)\s*\(\s*\)", content):
+                            self.add_result(
+                                "ERROR", "FUNCTION_WITHOUT_ARGS",
+                                "Bad function definition - use (void) instead of ()",
+                                filename, line_num, content
+                            )
+
+                    # INLINE_LOCATION: inline should come after storage class
+                    if re.match(r"^\s*inline\s+(static|extern)", content):
+                        self.add_result(
+                            "ERROR", "INLINE_LOCATION",
+                            "inline keyword should sit between storage class and type",
+                            filename, line_num, content
+                        )
+
+                    # STATIC_CONST: const should come after static
+                    if re.match(r"^\s*const\s+static\b", content):
+                        self.add_result(
+                            "WARNING", "STATIC_CONST",
+                            "Move const after static - use 'static const'",
+                            filename, line_num, content
+                        )
+                        self.add_result(
+                            "WARNING", "STORAGE_CLASS",
+                            "storage class should be at the beginning of the declaration",
+                            filename, line_num, content
+                        )
+
+                    # CONST_CONST: const used twice
+                    if re.search(r"\bconst\s+\w+\s+const\b", content):
+                        self.add_result(
+                            "WARNING", "CONST_CONST",
+                            "const used twice - remove duplicate const",
+                            filename, line_num, content
+                        )
+
+                    # SELF_ASSIGNMENT: x = x (simple variable, not struct members)
+                    # Match only simple identifiers, not struct/pointer member access
+                    match = re.search(r"^\s*(\w+)\s*=\s*(\w+)\s*;", content)
+                    if match and match.group(1) == match.group(2):
+                        self.add_result(
+                            "WARNING", "SELF_ASSIGNMENT",
+                            "Do not use self-assignments to avoid compiler warnings",
+                            filename, line_num, content
+                        )
+
+                    # PREFER_DEFINED_ATTRIBUTE_MACRO: prefer DPDK/kernel macros over __attribute__
+                    attr_macros = {
+                        'cold': '__rte_cold',
+                        'hot': '__rte_hot', 
+                        'noinline': '__rte_noinline',
+                        'always_inline': '__rte_always_inline',
+                        'unused': '__rte_unused',
+                        'packed': '__rte_packed',
+                        'aligned': '__rte_aligned',
+                        'weak': '__rte_weak',
+                        'pure': '__rte_pure',
+                    }
+                    for attr, replacement in attr_macros.items():
+                        if re.search(rf'__attribute__\s*\(\s*\(\s*{attr}\b', content):
+                            self.add_result(
+                                "WARNING", "PREFER_DEFINED_ATTRIBUTE_MACRO",
+                                f"Prefer {replacement} over __attribute__(({attr}))",
+                                filename, line_num, content
+                            )
+
+                    # POINTER_LOCATION: char* instead of char *
+                    if re.search(r"\b(char|int|void|short|long|float|double|unsigned|signed)\*\s+\w", content):
+                        self.add_result(
+                            "ERROR", "POINTER_LOCATION",
+                            "\"foo* bar\" should be \"foo *bar\"",
+                            filename, line_num, content
+                        )
+
+                    # MACRO_WITH_FLOW_CONTROL: macros with return/goto/break
+                    if re.match(r"^\s*#\s*define\s+\w+.*\b(return|goto|break|continue)\b", content):
+                        self.add_result(
+                            "WARNING", "MACRO_WITH_FLOW_CONTROL",
+                            "Macros with flow control statements should be avoided",
+                            filename, line_num, content
+                        )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros with multiple statements
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+.*;\s*[^\\]", content):
+                        if not re.search(r"do\s*\{", content):
+                            self.add_result(
+                                "WARNING", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                                "Macros with multiple statements should use do {} while(0)",
+                                filename, line_num, content
+                            )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros starting with if
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+if\s*\(", content):
+                        self.add_result(
+                            "ERROR", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                            "Macros starting with if should be enclosed by a do - while loop",
+                            filename, line_num, content
+                        )
+
+                    # Multiple statements on one line (skip comments and strings)
+                    stripped_content = content.strip()
+                    if re.search(r";\s*[a-zA-Z_]", content) and "for" not in content:
+                        # Skip if line is a comment
+                        if not (stripped_content.startswith("/*") or 
+                                stripped_content.startswith("*") or 
+                                stripped_content.startswith("//")):
+                            # Skip if the semicolon is inside a string or comment
+                            # Remove strings and comments before checking
+                            code_only = re.sub(r'"[^"]*"', '""', content)  # Remove string contents
+                            code_only = re.sub(r'/\*.*?\*/', '', code_only)  # Remove /* */ comments
+                            code_only = re.sub(r'//.*$', '', code_only)  # Remove // comments
+                            if re.search(r";\s*[a-zA-Z_]", code_only):
+                                self.add_result(
+                                    "CHECK", "MULTIPLE_STATEMENTS",
+                                    "multiple statements on one line",
+                                    filename, line_num, content
+                                )
+
+                    # Check for C99 comments in headers that should use C89
+                    if is_header and "//" in content:
+                        # Only flag if not in a string
+                        stripped = re.sub(r'"[^"]*"', '', content)
+                        if "//" in stripped:
+                            self.add_result(
+                                "CHECK", "C99_COMMENTS",
+                                "C99 // comments are acceptable but /* */ is preferred in headers",
+                                filename, line_num, content
+                            )
+
+                    # BLOCK_COMMENT_STYLE: block comments style issues
+                    # Leading /* on its own line (but allow Doxygen /** style)
+                    if re.match(r"^\s*/\*\*+\s*$", content):
+                        # Allow /** (Doxygen) but not /*** or more
+                        if not re.match(r"^\s*/\*\*\s*$", content):
+                            self.add_result(
+                                "WARNING", "BLOCK_COMMENT_STYLE",
+                                "Block comments should not use a leading /* on a line by itself",
+                                filename, line_num, content
+                            )
+                    # Trailing */ on separate line after block comment
+                    if re.match(r"^\s*\*+/\s*$", content) and prev_line.strip().startswith("*"):
+                        pass  # This is actually acceptable
+                    # Block with trailing */ but content before it (like === */)
+                    if re.search(r"\S\s*=+\s*\*/\s*$", content):
+                        self.add_result(
+                            "WARNING", "BLOCK_COMMENT_STYLE",
+                            "Block comments use a trailing */ on a separate line",
+                            filename, line_num, content
+                        )
+
+                    # REPEATED_WORD: check for repeated words
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        # Skip common valid repeated patterns
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                    # STRING_FRAGMENTS: unnecessary string concatenation like "foo" "bar"
+                    # Must have closing quote, whitespace, opening quote pattern
+                    if re.search(r'"\s*"\s*[^)]', content) and not re.search(r'#\s*define', content):
+                        # Verify it's actually two separate strings being concatenated
+                        # by checking for the pattern: "..." "..."
+                        if re.search(r'"[^"]*"\s+"[^"]*"', content):
+                            self.add_result(
+                                "CHECK", "STRING_FRAGMENTS",
+                                "Consecutive strings are generally better as a single string",
+                                filename, line_num, content
+                            )
+
+                prev_line = content
+
+    def check_spelling(self, patch_info: PatchInfo) -> None:
+        """Check for spelling errors using codespell dictionary."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # REPEATED_WORD check for non-C files (C files handled in check_coding_style)
+                if not filename.endswith((".c", ".h")):
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                # Spelling check
+                if self.spelling_dict:
+                    # Common abbreviations that should not be flagged as typos
+                    abbreviations = {
+                        'nd', 'ns', 'na', 'ra', 'rs',  # IPv6 Neighbor Discovery
+                        'tx', 'rx', 'id', 'io', 'ip',  # Common networking
+                        'tcp', 'udp', 'arp', 'dns',    # Protocols  
+                        'hw', 'sw', 'fw',              # Hardware/Software/Firmware
+                        'src', 'dst', 'ptr', 'buf',    # Common code abbreviations
+                        'cfg', 'ctx', 'idx', 'cnt',    # Config/Context/Index/Count
+                        'len', 'num', 'max', 'min',    # Length/Number/Max/Min
+                        'prev', 'next', 'curr',        # Previous/Next/Current
+                        'init', 'fini', 'deinit',      # Initialize/Finish
+                        'alloc', 'dealloc', 'realloc', # Memory
+                        'endcode',                      # Doxygen tag
+                    }
+                    # Extract words, but skip contractions (don't, couldn't, etc.)
+                    # by removing them before word extraction
+                    spell_content = re.sub(r"[a-zA-Z]+n't\b", '', content)
+                    spell_content = re.sub(r"[a-zA-Z]+'[a-zA-Z]+", '', spell_content)
+                    words = re.findall(r'\b[a-zA-Z]+\b', spell_content)
+                    for word in words:
+                        lower_word = word.lower()
+                        if lower_word in self.spelling_dict and lower_word not in abbreviations:
+                            self.add_result(
+                                "WARNING", "TYPO_SPELLING",
+                                f"'{word}' may be misspelled - perhaps '{self.spelling_dict[lower_word]}'?",
+                                filename, line_num, content
+                            )
+
+    def check_forbidden_tokens(self, patch_info: PatchInfo) -> None:
+        """Check for DPDK-specific forbidden tokens."""
+        for filename, lines in patch_info.added_lines.items():
+            for rule in self.forbidden_rules:
+                # Check if file is in one of the target folders
+                in_folder = False
+                for folder in rule["folders"]:
+                    if filename.startswith(folder + "/") or filename.startswith("b/" + folder + "/"):
+                        in_folder = True
+                        break
+
+                if not in_folder:
+                    continue
+
+                # Check if file should be skipped
+                skip = False
+                for skip_pattern in rule.get("skip_files", []):
+                    if re.search(skip_pattern, filename):
+                        skip = True
+                        break
+
+                if skip:
+                    continue
+
+                # Check each line for forbidden patterns
+                for line_num, content in lines:
+                    for pattern in rule["patterns"]:
+                        if re.search(pattern, content):
+                            self.add_result(
+                                "WARNING", "FORBIDDEN_TOKEN",
+                                rule["message"],
+                                filename, line_num, content
+                            )
+                            break
+
+    def check_experimental_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_experimental tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_experimental" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            f"Please only put __rte_experimental tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_experimental":
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            "__rte_experimental must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_internal_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_internal tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_internal" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            f"Please only put __rte_internal tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_internal":
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            "__rte_internal must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_aligned_attributes(self, patch_info: PatchInfo) -> None:
+        """Check alignment attribute usage."""
+        align_tokens = ["__rte_aligned", "__rte_cache_aligned", "__rte_cache_min_aligned"]
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                for token in align_tokens:
+                    if re.search(rf"\b{token}\b", content):
+                        # Should only be used with struct or union
+                        if not re.search(rf"\b(struct|union)\s*{token}\b", content):
+                            self.add_result(
+                                "WARNING", "ALIGNED_ATTRIBUTE",
+                                f"Please use {token} only for struct or union types alignment",
+                                filename, line_num, content
+                            )
+
+    def check_packed_attributes(self, patch_info: PatchInfo) -> None:
+        """Check packed attribute usage."""
+        begin_count = 0
+        end_count = 0
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_packed_begin" in content:
+                    begin_count += 1
+                    # Should be after struct, union, or alignment attributes
+                    if not re.search(r"\b(struct|union)\s*__rte_packed_begin\b", content) and \
+                       not re.search(r"__rte_cache_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_cache_min_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_aligned\(.*\)\s*__rte_packed_begin", content):
+                        self.add_result(
+                            "WARNING", "PACKED_ATTRIBUTE",
+                            "Use __rte_packed_begin only after struct, union or alignment attributes",
+                            filename, line_num, content
+                        )
+
+                if "__rte_packed_end" in content:
+                    end_count += 1
+
+        if begin_count != end_count:
+            self.add_result(
+                "WARNING", "PACKED_ATTRIBUTE",
+                "__rte_packed_begin and __rte_packed_end should always be used in pairs"
+            )
+
+    def check_patch(self, content: str, patch_file: str = None) -> bool:
+        """Run all checks on a patch."""
+        self.results = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Check patch format first
+        self.check_patch_format(content, patch_file)
+
+        patch_info = self.parse_patch(content)
+
+        # Run all checks
+        self.check_signoff(patch_info)
+        self.check_line_length(patch_info)
+        self.check_trailing_whitespace(patch_info)
+        self.check_tabs_spaces(patch_info)
+        self.check_coding_style(patch_info)
+        self.check_spelling(patch_info)
+        self.check_forbidden_tokens(patch_info)
+        self.check_experimental_tags(patch_info)
+        self.check_internal_tags(patch_info)
+        self.check_aligned_attributes(patch_info)
+        self.check_packed_attributes(patch_info)
+        self.check_commit_message(patch_info, content)
+
+        return self.errors == 0 and self.warnings == 0
+
+    def check_patch_format(self, content: str, patch_file: str = None) -> None:
+        """Check basic patch format for corruption."""
+        lines = content.split("\n")
+
+        # Track patch structure
+        has_diff = False
+        has_hunk = False
+        in_hunk = False
+        hunk_line = 0
+
+        for i, line in enumerate(lines, 1):
+            # Track diff headers
+            if line.startswith("diff --git"):
+                has_diff = True
+                in_hunk = False
+
+            # Parse hunk header
+            if line.startswith("@@"):
+                has_hunk = True
+                in_hunk = True
+                hunk_line = i
+                # Validate hunk header format
+                if not re.match(r"@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@", line):
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (malformed hunk header) at line {i}"
+                    )
+
+            # End of patch content (signature separator)
+            elif line == "-- ":
+                in_hunk = False
+
+            # Check for lines that look like they should be in a hunk but aren't prefixed
+            elif in_hunk and line and not line.startswith(("+", "-", " ", "\\", "diff ", "@@", "index ", "--- ", "+++ ", "new file", "deleted file", "old mode", "new mode", "rename ", "similarity", "copy ")):
+                # This could be a wrapped line or corruption
+                # But be careful - empty lines and commit message lines are OK
+                if not line.startswith(("From ", "Subject:", "Date:", "Signed-off-by:",
+                                       "Acked-by:", "Reviewed-by:", "Tested-by:",
+                                       "Fixes:", "Cc:", "---", "Message-Id:")):
+                    # Likely a corrupted/wrapped line in the diff
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (line wrapped?) at line {i}"
+                    )
+                    in_hunk = False  # Stop checking this hunk
+
+        if has_diff and not has_hunk:
+            self.add_result(
+                "ERROR", "CORRUPTED_PATCH",
+                "Patch appears to be corrupted (has diff but no hunks)"
+            )
+
+        # Check for DOS line endings
+        if "\r\n" in content:
+            self.add_result(
+                "ERROR", "DOS_LINE_ENDINGS",
+                "Patch has DOS line endings, should be UNIX line endings"
+            )
+
+    def check_commit_message(self, patch_info: PatchInfo, content: str) -> None:
+        """Check commit message for issues."""
+        lines = content.split("\n")
+
+        in_commit_msg = False
+        commit_msg_lines = []
+
+        for i, line in enumerate(lines):
+            if line.startswith("Subject:"):
+                in_commit_msg = True
+                continue
+            if line.startswith("---") or line.startswith("diff --git"):
+                in_commit_msg = False
+                continue
+            if in_commit_msg:
+                commit_msg_lines.append((i + 1, line))
+
+        for line_num, line in commit_msg_lines:
+            # UNKNOWN_COMMIT_ID: Fixes tag with short or invalid commit ID
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                commit_id = match.group(1)
+                if len(commit_id) < 12:
+                    self.add_result(
+                        "WARNING", "UNKNOWN_COMMIT_ID",
+                        f"Commit id '{commit_id}' is too short, use at least 12 characters",
+                        line_num=line_num, line_content=line
+                    )
+                # Check Fixes format: should be Fixes: <hash> ("commit subject")
+                if not re.match(r'^Fixes:\s+[0-9a-fA-F]{12,}\s+\("[^"]+"\)\s*$', line):
+                    self.add_result(
+                        "WARNING", "BAD_FIXES_TAG",
+                        "Fixes: tag format should be: Fixes: <12+ char hash> (\"commit subject\")",
+                        line_num=line_num, line_content=line
+                    )
+
+    def format_results(self, show_types: bool = True) -> str:
+        """Format the results for output."""
+        output = []
+
+        for result in self.results:
+            if result.filename and result.line_num:
+                prefix = f"{result.filename}:{result.line_num}:"
+            elif result.filename:
+                prefix = f"{result.filename}:"
+            else:
+                prefix = ""
+
+            type_str = f" [{result.type_name}]" if show_types else ""
+            output.append(f"{result.level}:{type_str} {result.message}")
+
+            if prefix:
+                output.append(f"#  {prefix}")
+            if result.line_content:
+                output.append(f"+  {result.line_content}")
+            output.append("")
+
+        return "\n".join(output)
+
+    def get_summary(self) -> str:
+        """Get a summary of the check results."""
+        return f"total: {self.errors} errors, {self.warnings} warnings, {self.checks} checks, {self.lines_checked} lines checked"
+
+
+def check_single_patch(checker: CheckPatch, patch_path: Optional[str],
+                       commit: Optional[str], verbose: bool, quiet: bool) -> bool:
+    """Check a single patch file or commit."""
+    subject = ""
+    content = ""
+
+    if patch_path:
+        try:
+            with open(patch_path, "r", encoding="utf-8", errors="replace") as f:
+                content = f.read()
+        except IOError as e:
+            print(f"Error reading {patch_path}: {e}", file=sys.stderr)
+            return False
+    elif commit:
+        try:
+            result = subprocess.run(
+                ["git", "format-patch", "--find-renames", "--no-stat", "--stdout", "-1", commit],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode != 0:
+                print(f"Error getting commit {commit}", file=sys.stderr)
+                return False
+            content = result.stdout
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            print(f"Error running git: {e}", file=sys.stderr)
+            return False
+    else:
+        content = sys.stdin.read()
+
+    # Extract subject
+    match = re.search(r"^Subject:\s*(.+?)(?:\n(?=\S)|\n\n)", content, re.MULTILINE | re.DOTALL)
+    if match:
+        subject = match.group(1).replace("\n ", " ").strip()
+
+    if verbose:
+        print(f"\n### {subject}\n")
+
+    is_clean = checker.check_patch(content, patch_path)
+    has_issues = checker.errors > 0 or checker.warnings > 0
+
+    if has_issues or verbose:
+        if not verbose and subject:
+            print(f"\n### {subject}\n")
+        print(checker.format_results(show_types=True))
+        print(checker.get_summary())
+
+    return is_clean
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Check patches for DPDK coding style and common issues",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s patch.diff                Check a patch file
+  %(prog)s -n 3                      Check last 3 commits
+  %(prog)s -r origin/main..HEAD      Check commits in range
+  cat patch.diff | %(prog)s          Check patch from stdin
+"""
+    )
+
+    parser.add_argument("patches", nargs="*", help="Patch files to check")
+    parser.add_argument("-n", type=int, metavar="NUM",
+                       help="Check last NUM commits")
+    parser.add_argument("-r", "--range", metavar="RANGE",
+                       help="Check commits in git range (default: origin/main..)")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                       help="Quiet mode - only show summary")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                       help="Verbose mode - show all checks")
+    parser.add_argument("--max-line-length", type=int, default=DEFAULT_LINE_LENGTH,
+                       help=f"Maximum line length (default: {DEFAULT_LINE_LENGTH})")
+    parser.add_argument("--codespell", action="store_true", default=True,
+                       help="Enable spell checking (default: enabled)")
+    parser.add_argument("--no-codespell", dest="codespell", action="store_false",
+                       help="Disable spell checking")
+    parser.add_argument("--codespellfile", metavar="FILE",
+                       help="Path to codespell dictionary")
+    parser.add_argument("--show-types", action="store_true", default=True,
+                       help="Show message types (default: enabled)")
+    parser.add_argument("--no-show-types", dest="show_types", action="store_false",
+                       help="Hide message types")
+
+    return parser.parse_args()
+
+
+def main():
+    """Main entry point."""
+    args = parse_args()
+
+    # Build configuration
+    config = {
+        "max_line_length": args.max_line_length,
+        "codespell": args.codespell,
+        "show_types": args.show_types,
+    }
+
+    if args.codespellfile:
+        config["codespell_file"] = args.codespellfile
+
+    checker = CheckPatch(config)
+
+    total = 0
+    failed = 0
+
+    if args.patches:
+        # Check specified patch files
+        for patch in args.patches:
+            total += 1
+            if not check_single_patch(checker, patch, None, args.verbose, args.quiet):
+                failed += 1
+
+    elif args.n or args.range:
+        # Check git commits
+        if args.n:
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", f"--max-count={args.n}", "HEAD"],
+                capture_output=True,
+                text=True
+            )
+        else:
+            git_range = args.range if args.range else "origin/main.."
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", git_range],
+                capture_output=True,
+                text=True
+            )
+
+        if result.returncode != 0:
+            print("Error getting git commits", file=sys.stderr)
+            sys.exit(1)
+
+        commits = result.stdout.strip().split("\n")
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    elif not sys.stdin.isatty():
+        # Read from stdin
+        total = 1
+        if not check_single_patch(checker, None, None, args.verbose, args.quiet):
+            failed += 1
+
+    else:
+        # Default to checking commits since origin/main
+        result = subprocess.run(
+            ["git", "rev-list", "--reverse", "origin/main.."],
+            capture_output=True,
+            text=True
+        )
+
+        commits = result.stdout.strip().split("\n") if result.stdout.strip() else []
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    # Print summary
+    passed = total - failed
+    if not args.quiet:
+        print(f"\n{passed}/{total} valid patch{'es' if passed != 1 else ''}")
+
+    sys.exit(failed)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4] devtools: replace checkpatches shell wrapper with Python script
  2026-01-31 20:48 [RFC] devtools: replace get-maintainer shell wrapper with Python script Stephen Hemminger
                   ` (2 preceding siblings ...)
  2026-02-03 14:17 ` [RFC v3] " Stephen Hemminger
@ 2026-02-04 16:59 ` Stephen Hemminger
  2026-02-04 17:29   ` Bruce Richardson
  2026-02-04 17:32   ` Bruce Richardson
  2026-02-26 17:15 ` [PATCH v5] devtools: add Python-based patch style checker Stephen Hemminger
  2026-03-24 14:48 ` [PATCH v6] " Stephen Hemminger
  5 siblings, 2 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-02-04 16:59 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger

Alternative to the checkpatches.sh shell script that wraps the Linux kernel's
checkpatch.pl with a standalone Python implementation. The new
dpdk-checkpatch.py script provides equivalent functionality without
requiring the kernel source tree or Perl.

Performance is significantly improved: checking a recent flow parser
patch series dropped from 2m23s to 0.43s (approximately 300x faster).

Supports checking individual patch files, mbox bundles containing
multiple patches, git commit ranges (-r), last N commits (-n), or
reading from stdin. Exit code is 0 if all patches are clean,
1 if any have issues.

Kernel checkpatch.pl compatible checks:
  - AVOID_EXTERNS, UNNECESSARY_BREAK, COMPLEX_MACRO
  - STRNCPY/STRCPY, RETURN_PARENTHESES, POINTER_LOCATION
  - INITIALISED_STATIC, GLOBAL_INITIALISERS
  - TRAILING_STATEMENTS, LINE_CONTINUATIONS, ONE_SEMICOLON
  - REPEATED_WORD, CONSTANT_COMPARISON, SELF_ASSIGNMENT
  - INLINE_LOCATION, STORAGE_CLASS, FUNCTION_WITHOUT_ARGS
  - MACRO_WITH_FLOW_CONTROL, MULTISTATEMENT_MACRO_USE_DO_WHILE
  - PREFER_DEFINED_ATTRIBUTE_MACRO (using DPDK __rte_* macros)
  - TYPO_SPELLING via codespell dictionary

DPDK-specific forbidden token checks:
  - RTE_LOG usage (prefer RTE_LOG_LINE)
  - printf/fprintf to stdout/stderr in libs/drivers
  - rte_panic/rte_exit, direct __attribute__ usage
  - Deprecated atomics (rte_atomicNN_xxx, rte_smp_[rw]mb)
  - Compiler builtins (__sync_xxx, __atomic_xxx, __builtin_xxx)
  - pthread functions (prefer rte_thread)
  - Reserved keywords, pragma, variadic macros

DPDK tag validation:
  - __rte_experimental and __rte_internal placement
  - __rte_packed_begin/end pairing
  - __rte_aligned attribute checking

Expected differences from the shell/Perl version:

The following checks from checkpatch.pl are intentionally not
implemented because they produced false positives on valid DPDK code:

  - DEEP_INDENTATION: tab counting without full brace-nesting
    tracking flags legitimate patterns in switch/case blocks and
    driver transmit paths.
  - CAMELCASE: flags standard C macros like PRIx64 and DPDK
    naming conventions.
  - SPACING after cast: flags RTE_ATOMIC() macro as a cast and
    flags unary minus in (type)-1 expressions.
  - CONCATENATED_STRING: flags valid C format string macros
    like "%"PRIx64.

The following checks have improved behavior:

  - AVOID_EXTERNS only flags function declarations, not data.
    Using extern on data declarations is sometimes necessary
    and does not need a warning.
  - COMPLEX_MACRO does not flag C99 compound literal macros
    like (const type[]){ ... } which are commonly used in DPDK.
  - RETURN_PARENTHESES does not flag return with casts such
    as return (uint16_t)(expr).
  - TYPO_SPELLING excludes common networking abbreviations
    (ND, NS, etc.) and Doxygen tags, and handles contractions
    (don't, couldn't) without false positives.
  - LINE_CONTINUATIONS properly tracks macro context across
    patch context lines.

Known limitations:
  - BRACES check not implemented (requires multi-line analysis)
  - COMMIT_LOG_LONG_LINE not implemented

This script was developed with assistance from the Claude AI assistant.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 devtools/dpdk-checkpatch.py | 1381 +++++++++++++++++++++++++++++++++++
 1 file changed, 1381 insertions(+)
 create mode 100755 devtools/dpdk-checkpatch.py

diff --git a/devtools/dpdk-checkpatch.py b/devtools/dpdk-checkpatch.py
new file mode 100755
index 0000000000..275cdb1391
--- /dev/null
+++ b/devtools/dpdk-checkpatch.py
@@ -0,0 +1,1381 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2026 Stephen Hemminger
+#
+# dpdk-checkpatch.py - Check patches for common style issues
+#
+# This is a standalone Python replacement for the DPDK checkpatches.sh
+# script that previously wrapped the Linux kernel's checkpatch.pl.
+#
+# Usage examples:
+#   # Check patch files
+#   dpdk-checkpatch.py *.patch
+#
+#   # Check patches before applying
+#   dpdk-checkpatch.py *.patch && git am *.patch
+#
+#   # Check commits since origin/main
+#   dpdk-checkpatch.py
+#
+#   # Quiet mode for scripting
+#   if dpdk-checkpatch.py -q "$patch"; then
+#       echo "Clean, applying..."
+#       git am "$patch"
+#   else
+#       echo "Issues found, skipping"
+#   fi
+#
+#   # Verbose output with context
+#   dpdk-checkpatch.py -v my-feature.patch
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+VERSION = "1.0"
+
+# Default configuration
+DEFAULT_LINE_LENGTH = 100
+DEFAULT_CODESPELL_DICT = "/usr/share/codespell/dictionary.txt"
+
+
+@dataclass
+class CheckResult:
+    """Result of a single check."""
+    level: str  # ERROR, WARNING, CHECK
+    type_name: str
+    message: str
+    filename: str = ""
+    line_num: int = 0
+    line_content: str = ""
+
+
+@dataclass
+class PatchInfo:
+    """Information extracted from a patch."""
+    subject: str = ""
+    author: str = ""
+    author_email: str = ""
+    signoffs: list = field(default_factory=list)
+    files: list = field(default_factory=list)
+    added_lines: dict = field(default_factory=dict)  # filename -> [(line_num, content)]
+    context_before: dict = field(default_factory=dict)  # filename -> {line_num: context_line}
+    has_fixes_tag: bool = False
+    fixes_commits: list = field(default_factory=list)
+
+
+class CheckPatch:
+    """Main class for checking patches."""
+
+    def __init__(self, config: dict):
+        self.config = config
+        self.results: list[CheckResult] = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Load codespell dictionary if enabled
+        self.spelling_dict = {}
+        if config.get("codespell"):
+            self._load_codespell_dict()
+
+        # Forbidden token rules for DPDK
+        self.forbidden_rules = self._init_forbidden_rules()
+
+    def _load_codespell_dict(self) -> None:
+        """Load the codespell dictionary."""
+        dict_path = self.config.get("codespell_file")
+
+        if not dict_path:
+            # Search common locations for the dictionary
+            search_paths = [
+                DEFAULT_CODESPELL_DICT,
+                "/usr/local/lib/python3.12/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.11/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.10/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/lib/python3/dist-packages/codespell_lib/data/dictionary.txt",
+            ]
+
+            # Also try to find it via codespell module
+            try:
+                import codespell_lib
+                module_path = os.path.join(
+                    os.path.dirname(codespell_lib.__file__),
+                    'data', 'dictionary.txt'
+                )
+                search_paths.insert(0, module_path)
+            except ImportError:
+                pass
+
+            for path in search_paths:
+                if os.path.exists(path):
+                    dict_path = path
+                    break
+
+        if not dict_path or not os.path.exists(dict_path):
+            return
+
+        try:
+            with open(dict_path, "r", encoding="utf-8", errors="ignore") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith("#"):
+                        continue
+                    parts = line.split("->")
+                    if len(parts) >= 2:
+                        wrong = parts[0].strip().lower()
+                        correct = parts[1].strip().split(",")[0].strip()
+                        self.spelling_dict[wrong] = correct
+        except IOError:
+            pass
+
+    def _init_forbidden_rules(self) -> list:
+        """Initialize DPDK-specific forbidden token rules."""
+        return [
+            # Refrain from new calls to RTE_LOG in libraries
+            {
+                "folders": ["lib"],
+                "patterns": [r"RTE_LOG\("],
+                "message": "Prefer RTE_LOG_LINE",
+            },
+            # Refrain from new calls to RTE_LOG in drivers
+            {
+                "folders": ["drivers"],
+                "skip_files": [r".*osdep\.h$"],
+                "patterns": [r"RTE_LOG\(", r"RTE_LOG_DP\(", r"rte_log\("],
+                "message": "Prefer RTE_LOG_LINE/RTE_LOG_DP_LINE",
+            },
+            # No output on stdout or stderr
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bprintf\b", r"fprintf\(stdout,", r"fprintf\(stderr,"],
+                "message": "Writing to stdout or stderr",
+            },
+            # Refrain from rte_panic() and rte_exit()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_panic\(", r"rte_exit\("],
+                "message": "Using rte_panic/rte_exit",
+            },
+            # Don't call directly install_headers()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\binstall_headers\b"],
+                "message": "Using install_headers()",
+            },
+            # Refrain from using compiler attribute without common macro
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"__attribute__"],
+                "message": "Using compiler attribute directly",
+            },
+            # Check %l or %ll format specifier
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"%ll*[xud]"],
+                "message": "Using %l format, prefer %PRI*64 if type is [u]int64_t",
+            },
+            # Refrain from 16/32/64 bits rte_atomicNN_xxx()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_atomic[0-9][0-9]_.*\("],
+                "message": "Using rte_atomicNN_xxx",
+            },
+            # Refrain from rte_smp_[r/w]mb()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_smp_(r|w)?mb\("],
+                "message": "Using rte_smp_[r/w]mb",
+            },
+            # Refrain from __sync_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__sync_.*\("],
+                "message": "Using __sync_xxx builtins",
+            },
+            # Refrain from __rte_atomic_thread_fence()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__rte_atomic_thread_fence\("],
+                "message": "Using __rte_atomic_thread_fence, prefer rte_atomic_thread_fence",
+            },
+            # Refrain from __atomic_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"drivers/common/cnxk/"],
+                "patterns": [r"__atomic_.*\(", r"__ATOMIC_(RELAXED|CONSUME|ACQUIRE|RELEASE|ACQ_REL|SEQ_CST)"],
+                "message": "Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx",
+            },
+            # Refrain from some pthread functions
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"pthread_(create|join|detach|set(_?name_np|affinity_np)|attr_set(inheritsched|schedpolicy))\("],
+                "message": "Using pthread functions, prefer rte_thread",
+            },
+            # Forbid use of __reserved
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__reserved\b"],
+                "message": "Using __reserved",
+            },
+            # Forbid use of __alignof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__alignof__\b"],
+                "message": "Using __alignof__, prefer C11 alignof",
+            },
+            # Forbid use of __typeof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__typeof__\b"],
+                "message": "Using __typeof__, prefer typeof",
+            },
+            # Forbid use of __builtin_*
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/", r"drivers/.*/base/", r"drivers/.*osdep\.h$"],
+                "patterns": [r"\b__builtin_"],
+                "message": "Using __builtin helpers, prefer EAL macros",
+            },
+            # Forbid inclusion of linux/pci_regs.h
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"include.*linux/pci_regs\.h"],
+                "message": "Using linux/pci_regs.h, prefer rte_pci.h",
+            },
+            # Forbid variadic argument pack extension in macros
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"#\s*define.*[^(,\s]\.\.\.[\s]*\)"],
+                "message": "Do not use variadic argument pack in macros",
+            },
+            # Forbid __rte_packed_begin with enums
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"enum.*__rte_packed_begin"],
+                "message": "Using __rte_packed_begin with enum is not allowed",
+            },
+            # Forbid use of #pragma
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"(#pragma|_Pragma)"],
+                "message": "Using compilers pragma is not allowed",
+            },
+            # Forbid experimental build flag except in examples
+            {
+                "folders": ["lib", "drivers", "app"],
+                "patterns": [r"-DALLOW_EXPERIMENTAL_API", r"allow_experimental_apis"],
+                "message": "Using experimental build flag for in-tree compilation",
+            },
+            # Refrain from using RTE_LOG_REGISTER for drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bRTE_LOG_REGISTER\b"],
+                "message": "Using RTE_LOG_REGISTER, prefer RTE_LOG_REGISTER_(DEFAULT|SUFFIX)",
+            },
+            # Forbid non-internal thread in drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_thread_(set_name|create_control)\("],
+                "message": "Prefer rte_thread_(set_prefixed_name|create_internal_control)",
+            },
+        ]
+
+    def add_result(self, level: str, type_name: str, message: str,
+                   filename: str = "", line_num: int = 0, line_content: str = "") -> None:
+        """Add a check result."""
+        result = CheckResult(
+            level=level,
+            type_name=type_name,
+            message=message,
+            filename=filename,
+            line_num=line_num,
+            line_content=line_content
+        )
+        self.results.append(result)
+
+        if level == "ERROR":
+            self.errors += 1
+        elif level == "WARNING":
+            self.warnings += 1
+        else:
+            self.checks += 1
+
+    def parse_patch(self, content: str) -> PatchInfo:
+        """Parse a patch and extract information."""
+        info = PatchInfo()
+        current_file = ""
+        in_diff = False
+        line_num_in_new = 0
+
+        lines = content.split("\n")
+        for i, line in enumerate(lines):
+            # Extract subject
+            if line.startswith("Subject:"):
+                subject = line[8:].strip()
+                # Handle multi-line subjects
+                j = i + 1
+                while j < len(lines) and lines[j].startswith(" "):
+                    subject += " " + lines[j].strip()
+                    j += 1
+                info.subject = subject
+
+            # Extract author
+            if line.startswith("From:"):
+                info.author = line[5:].strip()
+                match = re.search(r"<([^>]+)>", info.author)
+                if match:
+                    info.author_email = match.group(1)
+
+            # Extract Signed-off-by
+            match = re.match(r"^Signed-off-by:\s*(.+)$", line, re.IGNORECASE)
+            if match:
+                info.signoffs.append(match.group(1).strip())
+
+            # Extract Fixes tag
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                info.has_fixes_tag = True
+                info.fixes_commits.append(match.group(1))
+
+            # Track files in diff
+            if line.startswith("diff --git"):
+                match = re.match(r"diff --git a/(\S+) b/(\S+)", line)
+                if match:
+                    current_file = match.group(2)
+                    if current_file not in info.files:
+                        info.files.append(current_file)
+                    info.added_lines[current_file] = []
+                in_diff = True
+
+            # Track hunks
+            if line.startswith("@@"):
+                match = re.match(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
+                if match:
+                    line_num_in_new = int(match.group(1))
+                continue
+
+            # Track added lines
+            if in_diff and current_file:
+                if line.startswith("+") and not line.startswith("+++"):
+                    info.added_lines[current_file].append((line_num_in_new, line[1:]))
+                    line_num_in_new += 1
+                elif line.startswith("-"):
+                    pass  # Deleted line, don't increment
+                elif not line.startswith("\\"):
+                    # Context line - store it for reference by line number
+                    if current_file not in info.context_before:
+                        info.context_before[current_file] = {}
+                    info.context_before[current_file][line_num_in_new] = line[1:] if line.startswith(" ") else line
+                    line_num_in_new += 1
+
+        return info
+
+    def check_line_length(self, patch_info: PatchInfo) -> None:
+        """Check for lines exceeding maximum length."""
+        max_len = self.config.get("max_line_length", DEFAULT_LINE_LENGTH)
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # Skip strings that span multiple lines
+                if len(content) > max_len:
+                    # Don't warn about long strings or URLs
+                    if '\"' in content and content.count('\"') >= 2:
+                        continue
+                    if "http://" in content or "https://" in content:
+                        continue
+                    # Check if it's a comment line
+                    if content.strip().startswith("/*") or content.strip().startswith("*") or content.strip().startswith("//"):
+                        self.add_result(
+                            "WARNING", "LONG_LINE_COMMENT",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+                    else:
+                        self.add_result(
+                            "WARNING", "LONG_LINE",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+
+    def check_trailing_whitespace(self, patch_info: PatchInfo) -> None:
+        """Check for trailing whitespace."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if content != content.rstrip():
+                    self.add_result(
+                        "WARNING", "TRAILING_WHITESPACE",
+                        "trailing whitespace",
+                        filename, line_num, content
+                    )
+
+    def check_tabs_spaces(self, patch_info: PatchInfo) -> None:
+        """Check for space before tab and mixed indentation."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if " \t" in content:
+                    self.add_result(
+                        "WARNING", "SPACE_BEFORE_TAB",
+                        "space before tab in indent",
+                        filename, line_num, content
+                    )
+
+    def check_signoff(self, patch_info: PatchInfo) -> None:
+        """Check for Signed-off-by line."""
+        if not patch_info.signoffs:
+            self.add_result(
+                "ERROR", "MISSING_SIGN_OFF",
+                "Missing Signed-off-by: line(s)"
+            )
+
+    def check_coding_style(self, patch_info: PatchInfo) -> None:
+        """Check various coding style issues."""
+        for filename, lines in patch_info.added_lines.items():
+            # Skip non-C files for most checks
+            is_c_file = filename.endswith((".c", ".h"))
+            is_c_source = filename.endswith(".c")
+            is_header = filename.endswith(".h")
+
+            prev_line = ""
+            indent_stack = []
+            context_before = patch_info.context_before.get(filename, {})
+            for line_num, content in lines:
+                self.lines_checked += 1
+
+                # Check if the line immediately before this one (which may be
+                # a context line from the patch) ended with backslash continuation
+                prev_context = context_before.get(line_num - 1, "")
+                in_macro_continuation = prev_context.rstrip().endswith("\\")
+
+                if is_c_file:
+                    # Check for extern function declarations in .c files
+                    # Only flag functions (have parentheses), not data
+                    if is_c_source and re.match(r"^\s*extern\b", content):
+                        if re.search(r'\(', content):
+                            self.add_result(
+                                "WARNING", "AVOID_EXTERNS",
+                                "extern is not needed for function declarations",
+                                filename, line_num, content
+                            )
+
+                    # Check for unnecessary break after goto/return/continue
+                    # Only flag if the previous statement is unconditional (not inside an if)
+                    if re.match(r"^\s*break\s*;", content):
+                        # Check if previous line is an unconditional return/goto/continue
+                        # It's unconditional if it starts at the same or lower indentation as break
+                        # or if it's a plain return/goto not inside an if block
+                        prev_stripped = prev_line.strip() if prev_line else ""
+                        if re.match(r"^(goto\s+\w+|return\b|continue)\s*[^;]*;\s*$", prev_stripped):
+                            # Check indentation - if prev line has same or less indentation, it's unconditional
+                            break_indent = len(content) - len(content.lstrip())
+                            prev_indent = len(prev_line) - len(prev_line.lstrip()) if prev_line else 0
+                            # Only flag if the return/goto is at the same indentation level
+                            # (meaning it's not inside a nested if block)
+                            if prev_indent <= break_indent:
+                                self.add_result(
+                                    "WARNING", "UNNECESSARY_BREAK",
+                                    "break is not useful after a goto or return",
+                                    filename, line_num, content
+                                )
+
+                    # STRNCPY: should use strlcpy
+                    if re.search(r"\bstrncpy\s*\(", content):
+                        self.add_result(
+                            "WARNING", "STRNCPY",
+                            "Prefer strlcpy over strncpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/",
+                            filename, line_num, content
+                        )
+
+                    # STRCPY: unsafe string copy
+                    if re.search(r"\bstrcpy\s*\(", content):
+                        self.add_result(
+                            "ERROR", "STRCPY",
+                            "strcpy is unsafe - use strlcpy or snprintf",
+                            filename, line_num, content
+                        )
+
+                    # Check for complex macros without proper enclosure
+                    # Note: Compound literal macros like (type[]){...} are valid C99
+                    # and commonly used in DPDK, so we don't flag those.
+                    # Only flag macros with multiple statements without do-while wrapping.
+                    if re.match(r"^\s*#\s*define\s+\w+\s*\([^)]*\)\s+\{", content):
+                        # Macro body starts with { but is not a compound literal
+                        # Check if it's missing do { } while(0)
+                        if not re.search(r"\bdo\s*\{", content):
+                            self.add_result(
+                                "ERROR", "COMPLEX_MACRO",
+                                "Macros with complex values should be enclosed in parentheses or do { } while(0)",
+                                filename, line_num, content
+                            )
+
+                    # SPACING: missing space before ( in control statements
+                    if re.search(r"\b(if|while|for|switch)\(", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space required before the open parenthesis '('",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited after open square bracket
+                    if re.search(r"\[\s+[^\]]", content) and not re.search(r"\[\s*\]", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited after that open square bracket '['",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited before close square bracket
+                    if re.search(r"[^\[]\s+\]", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited before that close square bracket ']'",
+                            filename, line_num, content
+                        )
+
+                    # RETURN_PARENTHESES: return with parentheses
+                    if re.search(r"\breturn\s*\([^;]+\)\s*;", content):
+                        # Avoid false positives for:
+                        # - function calls: return (func())
+                        # - casts: return (type)expr or return (type)(expr)
+                        if not re.search(r"\breturn\s*\(\s*\w+\s*\([^)]*\)\s*\)\s*;", content) and \
+                           not re.search(r"\breturn\s+\([a-zA-Z_][\w\s\*]*\)", content):
+                            self.add_result(
+                                "WARNING", "RETURN_PARENTHESES",
+                                "return is not a function, parentheses are not required",
+                                filename, line_num, content
+                            )
+
+                    # BRACES: single statement blocks that need braces
+                    # Check for if/else/while/for without braces on multiline
+                    if re.match(r"^\s*(if|else\s+if|while|for)\s*\([^{]*$", content):
+                        # Control statement without opening brace - check next line
+                        pass  # Would need lookahead
+
+                    # INITIALISED_STATIC: static initialized to 0/NULL
+                    if re.match(r"^\s*static\s+.*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        self.add_result(
+                            "WARNING", "INITIALISED_STATIC",
+                            "do not initialise statics to 0 or NULL",
+                            filename, line_num, content
+                        )
+
+                    # GLOBAL_INITIALISERS: global initialized to 0/NULL
+                    if re.match(r"^[a-zA-Z_][a-zA-Z0-9_\s\*]*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        if not re.match(r"^\s*static\s+", content):
+                            self.add_result(
+                                "WARNING", "GLOBAL_INITIALISERS",
+                                "do not initialise globals to 0 or NULL",
+                                filename, line_num, content
+                            )
+
+                    # Note: DEEP_INDENTATION check removed - without full brace
+                    # nesting tracking (as in checkpatch.pl), tab counting produces
+                    # too many false positives in legitimate code like switch/case
+                    # blocks and nested loops in driver transmit paths.
+
+                    # TRAILING_STATEMENTS: code on same line as } OR control statement
+                    # But allow struct/union member declarations: } name; or } name; /* comment */
+                    if re.search(r"\}\s*[a-zA-Z_]", content) and not re.search(r"\}\s*(else|while)\b", content):
+                        # Check if this is a struct/union member declaration
+                        # Pattern: } identifier; or } identifier[]; or with comment
+                        if not re.search(r"\}\s*\w+\s*(\[\d*\])?\s*;\s*(/\*.*\*/|//.*)?\s*$", content):
+                            self.add_result(
+                                "ERROR", "TRAILING_STATEMENTS",
+                                "trailing statements should be on next line",
+                                filename, line_num, content
+                            )
+                    # Also check for if/while/for with statement on same line (not opening brace)
+                    # Pattern: if (cond) statement; or if (cond) statement; /* comment */
+                    if re.search(r"\b(if|while|for)\s*\([^)]+\)\s+(?![\s{])[^;]*;", content):
+                        self.add_result(
+                            "ERROR", "TRAILING_STATEMENTS",
+                            "trailing statements should be on next line",
+                            filename, line_num, content
+                        )
+
+                    # CONSTANT_COMPARISON: Yoda conditions (constant on left)
+                    if re.search(r'\b(NULL|true|false)\s*[!=]=\s*[&*\w]', content) or \
+                       re.search(r'\(\s*0\s*[!=]=\s*[&*\w]', content):
+                        self.add_result(
+                            "WARNING", "CONSTANT_COMPARISON",
+                            "Comparisons should place the constant on the right side",
+                            filename, line_num, content
+                        )
+
+                    # BRACES: single statement block should not have braces (or vice versa)
+                    # Check for if/else/while/for with single statement in braces
+                    if re.match(r"^\s*(if|while|for)\s*\([^)]+\)\s*\{\s*$", prev_line):
+                        if re.match(r"^\s*\w.*;\s*$", content) and not re.search(r"^\s*(if|else|while|for|switch|case|default|return\s*;)", content):
+                            # Check if next line is just closing brace - would need lookahead
+                            pass
+
+                    # ONE_SEMICOLON: double semicolon
+                    if re.search(r";;", content) and not re.search(r"for\s*\([^)]*;;", content):
+                        self.add_result(
+                            "WARNING", "ONE_SEMICOLON",
+                            "Statements terminations use 1 semicolon",
+                            filename, line_num, content
+                        )
+
+                    # CODE_INDENT/LEADING_SPACE: spaces used for indentation instead of tabs
+                    if re.match(r"^    +[^\s]", content) and not content.strip().startswith("*"):
+                        # Line starts with spaces (not tabs) - but allow for alignment in comments
+                        self.add_result(
+                            "WARNING", "CODE_INDENT",
+                            "code indent should use tabs where possible",
+                            filename, line_num, content
+                        )
+
+                    # LEADING_SPACE: spaces at start of line (more general)
+                    if re.match(r"^ +\t", content):
+                        self.add_result(
+                            "WARNING", "LEADING_SPACE",
+                            "please, no spaces at the start of a line",
+                            filename, line_num, content
+                        )
+
+                    # LINE_CONTINUATIONS: backslash continuation outside macros
+                    # Check if this line has a backslash continuation
+                    if content.rstrip().endswith("\\"):
+                        # Only flag if not inside a macro definition
+                        # A macro context means either:
+                        # - This line starts a #define
+                        # - The previous line (added or context) was a continuation
+                        # - This line is a preprocessor directive
+                        is_in_macro = (
+                            re.match(r"^\s*#", content) or
+                            (prev_line and prev_line.rstrip().endswith("\\")) or
+                            in_macro_continuation
+                        )
+                        if not is_in_macro:
+                            self.add_result(
+                                "WARNING", "LINE_CONTINUATIONS",
+                                "Avoid unnecessary line continuations",
+                                filename, line_num, content
+                            )
+
+                    # FUNCTION_WITHOUT_ARGS: empty parens instead of (void)
+                    if is_header and re.search(r"\b\w+\s*\(\s*\)\s*;", content):
+                        if not re.search(r"\b(while|if|for|switch|return)\s*\(\s*\)", content):
+                            self.add_result(
+                                "ERROR", "FUNCTION_WITHOUT_ARGS",
+                                "Bad function definition - use (void) instead of ()",
+                                filename, line_num, content
+                            )
+
+                    # INLINE_LOCATION: inline should come after storage class
+                    if re.match(r"^\s*inline\s+(static|extern)", content):
+                        self.add_result(
+                            "ERROR", "INLINE_LOCATION",
+                            "inline keyword should sit between storage class and type",
+                            filename, line_num, content
+                        )
+
+                    # STATIC_CONST: const should come after static
+                    if re.match(r"^\s*const\s+static\b", content):
+                        self.add_result(
+                            "WARNING", "STATIC_CONST",
+                            "Move const after static - use 'static const'",
+                            filename, line_num, content
+                        )
+                        self.add_result(
+                            "WARNING", "STORAGE_CLASS",
+                            "storage class should be at the beginning of the declaration",
+                            filename, line_num, content
+                        )
+
+                    # CONST_CONST: const used twice
+                    if re.search(r"\bconst\s+\w+\s+const\b", content):
+                        self.add_result(
+                            "WARNING", "CONST_CONST",
+                            "const used twice - remove duplicate const",
+                            filename, line_num, content
+                        )
+
+                    # SELF_ASSIGNMENT: x = x (simple variable, not struct members)
+                    # Match only simple identifiers, not struct/pointer member access
+                    match = re.search(r"^\s*(\w+)\s*=\s*(\w+)\s*;", content)
+                    if match and match.group(1) == match.group(2):
+                        self.add_result(
+                            "WARNING", "SELF_ASSIGNMENT",
+                            "Do not use self-assignments to avoid compiler warnings",
+                            filename, line_num, content
+                        )
+
+                    # PREFER_DEFINED_ATTRIBUTE_MACRO: prefer DPDK/kernel macros over __attribute__
+                    attr_macros = {
+                        'cold': '__rte_cold',
+                        'hot': '__rte_hot', 
+                        'noinline': '__rte_noinline',
+                        'always_inline': '__rte_always_inline',
+                        'unused': '__rte_unused',
+                        'packed': '__rte_packed',
+                        'aligned': '__rte_aligned',
+                        'weak': '__rte_weak',
+                        'pure': '__rte_pure',
+                    }
+                    for attr, replacement in attr_macros.items():
+                        if re.search(rf'__attribute__\s*\(\s*\(\s*{attr}\b', content):
+                            self.add_result(
+                                "WARNING", "PREFER_DEFINED_ATTRIBUTE_MACRO",
+                                f"Prefer {replacement} over __attribute__(({attr}))",
+                                filename, line_num, content
+                            )
+
+                    # POINTER_LOCATION: char* instead of char *
+                    if re.search(r"\b(char|int|void|short|long|float|double|unsigned|signed)\*\s+\w", content):
+                        self.add_result(
+                            "ERROR", "POINTER_LOCATION",
+                            "\"foo* bar\" should be \"foo *bar\"",
+                            filename, line_num, content
+                        )
+
+                    # MACRO_WITH_FLOW_CONTROL: macros with return/goto/break
+                    if re.match(r"^\s*#\s*define\s+\w+.*\b(return|goto|break|continue)\b", content):
+                        self.add_result(
+                            "WARNING", "MACRO_WITH_FLOW_CONTROL",
+                            "Macros with flow control statements should be avoided",
+                            filename, line_num, content
+                        )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros with multiple statements
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+.*;\s*[^\\]", content):
+                        if not re.search(r"do\s*\{", content):
+                            self.add_result(
+                                "WARNING", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                                "Macros with multiple statements should use do {} while(0)",
+                                filename, line_num, content
+                            )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros starting with if
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+if\s*\(", content):
+                        self.add_result(
+                            "ERROR", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                            "Macros starting with if should be enclosed by a do - while loop",
+                            filename, line_num, content
+                        )
+
+                    # Multiple statements on one line (skip comments and strings)
+                    stripped_content = content.strip()
+                    if re.search(r";\s*[a-zA-Z_]", content) and "for" not in content:
+                        # Skip if line is a comment
+                        if not (stripped_content.startswith("/*") or 
+                                stripped_content.startswith("*") or 
+                                stripped_content.startswith("//")):
+                            # Skip if the semicolon is inside a string or comment
+                            # Remove strings and comments before checking
+                            code_only = re.sub(r'"[^"]*"', '""', content)  # Remove string contents
+                            code_only = re.sub(r'/\*.*?\*/', '', code_only)  # Remove /* */ comments
+                            code_only = re.sub(r'//.*$', '', code_only)  # Remove // comments
+                            if re.search(r";\s*[a-zA-Z_]", code_only):
+                                self.add_result(
+                                    "CHECK", "MULTIPLE_STATEMENTS",
+                                    "multiple statements on one line",
+                                    filename, line_num, content
+                                )
+
+                    # Check for C99 comments in headers that should use C89
+                    if is_header and "//" in content:
+                        # Only flag if not in a string
+                        stripped = re.sub(r'"[^"]*"', '', content)
+                        if "//" in stripped:
+                            self.add_result(
+                                "CHECK", "C99_COMMENTS",
+                                "C99 // comments are acceptable but /* */ is preferred in headers",
+                                filename, line_num, content
+                            )
+
+                    # BLOCK_COMMENT_STYLE: block comments style issues
+                    # Leading /* on its own line (but allow Doxygen /** style)
+                    if re.match(r"^\s*/\*\*+\s*$", content):
+                        # Allow /** (Doxygen) but not /*** or more
+                        if not re.match(r"^\s*/\*\*\s*$", content):
+                            self.add_result(
+                                "WARNING", "BLOCK_COMMENT_STYLE",
+                                "Block comments should not use a leading /* on a line by itself",
+                                filename, line_num, content
+                            )
+                    # Trailing */ on separate line after block comment
+                    if re.match(r"^\s*\*+/\s*$", content) and prev_line.strip().startswith("*"):
+                        pass  # This is actually acceptable
+                    # Block with trailing */ but content before it (like === */)
+                    if re.search(r"\S\s*=+\s*\*/\s*$", content):
+                        self.add_result(
+                            "WARNING", "BLOCK_COMMENT_STYLE",
+                            "Block comments use a trailing */ on a separate line",
+                            filename, line_num, content
+                        )
+
+                    # REPEATED_WORD: check for repeated words
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        # Skip common valid repeated patterns
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                    # STRING_FRAGMENTS: unnecessary string concatenation like "foo" "bar"
+                    # Must have closing quote, whitespace, opening quote pattern
+                    if re.search(r'"\s*"\s*[^)]', content) and not re.search(r'#\s*define', content):
+                        # Verify it's actually two separate strings being concatenated
+                        # by checking for the pattern: "..." "..."
+                        if re.search(r'"[^"]*"\s+"[^"]*"', content):
+                            self.add_result(
+                                "CHECK", "STRING_FRAGMENTS",
+                                "Consecutive strings are generally better as a single string",
+                                filename, line_num, content
+                            )
+
+                prev_line = content
+
+    def check_spelling(self, patch_info: PatchInfo) -> None:
+        """Check for spelling errors using codespell dictionary."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # REPEATED_WORD check for non-C files (C files handled in check_coding_style)
+                if not filename.endswith((".c", ".h")):
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                # Spelling check
+                if self.spelling_dict:
+                    # Common abbreviations that should not be flagged as typos
+                    abbreviations = {
+                        'nd', 'ns', 'na', 'ra', 'rs',  # IPv6 Neighbor Discovery
+                        'tx', 'rx', 'id', 'io', 'ip',  # Common networking
+                        'tcp', 'udp', 'arp', 'dns',    # Protocols  
+                        'hw', 'sw', 'fw',              # Hardware/Software/Firmware
+                        'src', 'dst', 'ptr', 'buf',    # Common code abbreviations
+                        'cfg', 'ctx', 'idx', 'cnt',    # Config/Context/Index/Count
+                        'len', 'num', 'max', 'min',    # Length/Number/Max/Min
+                        'prev', 'next', 'curr',        # Previous/Next/Current
+                        'init', 'fini', 'deinit',      # Initialize/Finish
+                        'alloc', 'dealloc', 'realloc', # Memory
+                        'endcode',                      # Doxygen tag
+                    }
+                    # Extract words, but skip contractions (don't, couldn't, etc.)
+                    # by removing them before word extraction
+                    spell_content = re.sub(r"[a-zA-Z]+n't\b", '', content)
+                    spell_content = re.sub(r"[a-zA-Z]+'[a-zA-Z]+", '', spell_content)
+                    words = re.findall(r'\b[a-zA-Z]+\b', spell_content)
+                    for word in words:
+                        lower_word = word.lower()
+                        if lower_word in self.spelling_dict and lower_word not in abbreviations:
+                            self.add_result(
+                                "WARNING", "TYPO_SPELLING",
+                                f"'{word}' may be misspelled - perhaps '{self.spelling_dict[lower_word]}'?",
+                                filename, line_num, content
+                            )
+
+    def check_forbidden_tokens(self, patch_info: PatchInfo) -> None:
+        """Check for DPDK-specific forbidden tokens."""
+        for filename, lines in patch_info.added_lines.items():
+            for rule in self.forbidden_rules:
+                # Check if file is in one of the target folders
+                in_folder = False
+                for folder in rule["folders"]:
+                    if filename.startswith(folder + "/") or filename.startswith("b/" + folder + "/"):
+                        in_folder = True
+                        break
+
+                if not in_folder:
+                    continue
+
+                # Check if file should be skipped
+                skip = False
+                for skip_pattern in rule.get("skip_files", []):
+                    if re.search(skip_pattern, filename):
+                        skip = True
+                        break
+
+                if skip:
+                    continue
+
+                # Check each line for forbidden patterns
+                for line_num, content in lines:
+                    for pattern in rule["patterns"]:
+                        if re.search(pattern, content):
+                            self.add_result(
+                                "WARNING", "FORBIDDEN_TOKEN",
+                                rule["message"],
+                                filename, line_num, content
+                            )
+                            break
+
+    def check_experimental_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_experimental tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_experimental" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            f"Please only put __rte_experimental tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_experimental":
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            "__rte_experimental must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_internal_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_internal tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_internal" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            f"Please only put __rte_internal tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_internal":
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            "__rte_internal must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_aligned_attributes(self, patch_info: PatchInfo) -> None:
+        """Check alignment attribute usage."""
+        align_tokens = ["__rte_aligned", "__rte_cache_aligned", "__rte_cache_min_aligned"]
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                for token in align_tokens:
+                    if re.search(rf"\b{token}\b", content):
+                        # Should only be used with struct or union
+                        if not re.search(rf"\b(struct|union)\s*{token}\b", content):
+                            self.add_result(
+                                "WARNING", "ALIGNED_ATTRIBUTE",
+                                f"Please use {token} only for struct or union types alignment",
+                                filename, line_num, content
+                            )
+
+    def check_packed_attributes(self, patch_info: PatchInfo) -> None:
+        """Check packed attribute usage."""
+        begin_count = 0
+        end_count = 0
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_packed_begin" in content:
+                    begin_count += 1
+                    # Should be after struct, union, or alignment attributes
+                    if not re.search(r"\b(struct|union)\s*__rte_packed_begin\b", content) and \
+                       not re.search(r"__rte_cache_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_cache_min_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_aligned\(.*\)\s*__rte_packed_begin", content):
+                        self.add_result(
+                            "WARNING", "PACKED_ATTRIBUTE",
+                            "Use __rte_packed_begin only after struct, union or alignment attributes",
+                            filename, line_num, content
+                        )
+
+                if "__rte_packed_end" in content:
+                    end_count += 1
+
+        if begin_count != end_count:
+            self.add_result(
+                "WARNING", "PACKED_ATTRIBUTE",
+                "__rte_packed_begin and __rte_packed_end should always be used in pairs"
+            )
+
+    def check_patch(self, content: str, patch_file: str = None) -> bool:
+        """Run all checks on a patch."""
+        self.results = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Check patch format first
+        self.check_patch_format(content, patch_file)
+
+        patch_info = self.parse_patch(content)
+
+        # Run all checks
+        self.check_signoff(patch_info)
+        self.check_line_length(patch_info)
+        self.check_trailing_whitespace(patch_info)
+        self.check_tabs_spaces(patch_info)
+        self.check_coding_style(patch_info)
+        self.check_spelling(patch_info)
+        self.check_forbidden_tokens(patch_info)
+        self.check_experimental_tags(patch_info)
+        self.check_internal_tags(patch_info)
+        self.check_aligned_attributes(patch_info)
+        self.check_packed_attributes(patch_info)
+        self.check_commit_message(patch_info, content)
+
+        return self.errors == 0 and self.warnings == 0
+
+    def check_patch_format(self, content: str, patch_file: str = None) -> None:
+        """Check basic patch format for corruption."""
+        lines = content.split("\n")
+
+        # Track patch structure
+        has_diff = False
+        has_hunk = False
+        in_hunk = False
+        hunk_line = 0
+
+        for i, line in enumerate(lines, 1):
+            # Track diff headers
+            if line.startswith("diff --git"):
+                has_diff = True
+                in_hunk = False
+
+            # Parse hunk header
+            if line.startswith("@@"):
+                has_hunk = True
+                in_hunk = True
+                hunk_line = i
+                # Validate hunk header format
+                if not re.match(r"@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@", line):
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (malformed hunk header) at line {i}"
+                    )
+
+            # End of patch content (signature separator)
+            elif line == "-- ":
+                in_hunk = False
+
+            # Check for lines that look like they should be in a hunk but aren't prefixed
+            elif in_hunk and line and not line.startswith(("+", "-", " ", "\\", "diff ", "@@", "index ", "--- ", "+++ ", "new file", "deleted file", "old mode", "new mode", "rename ", "similarity", "copy ")):
+                # This could be a wrapped line or corruption
+                # But be careful - empty lines and commit message lines are OK
+                if not line.startswith(("From ", "Subject:", "Date:", "Signed-off-by:",
+                                       "Acked-by:", "Reviewed-by:", "Tested-by:",
+                                       "Fixes:", "Cc:", "---", "Message-Id:")):
+                    # Likely a corrupted/wrapped line in the diff
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (line wrapped?) at line {i}"
+                    )
+                    in_hunk = False  # Stop checking this hunk
+
+        if has_diff and not has_hunk:
+            self.add_result(
+                "ERROR", "CORRUPTED_PATCH",
+                "Patch appears to be corrupted (has diff but no hunks)"
+            )
+
+        # Check for DOS line endings
+        if "\r\n" in content:
+            self.add_result(
+                "ERROR", "DOS_LINE_ENDINGS",
+                "Patch has DOS line endings, should be UNIX line endings"
+            )
+
+    def check_commit_message(self, patch_info: PatchInfo, content: str) -> None:
+        """Check commit message for issues."""
+        lines = content.split("\n")
+
+        in_commit_msg = False
+        commit_msg_lines = []
+
+        for i, line in enumerate(lines):
+            if line.startswith("Subject:"):
+                in_commit_msg = True
+                continue
+            if line.startswith("---") or line.startswith("diff --git"):
+                in_commit_msg = False
+                continue
+            if in_commit_msg:
+                commit_msg_lines.append((i + 1, line))
+
+        for line_num, line in commit_msg_lines:
+            # UNKNOWN_COMMIT_ID: Fixes tag with short or invalid commit ID
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                commit_id = match.group(1)
+                if len(commit_id) < 12:
+                    self.add_result(
+                        "WARNING", "UNKNOWN_COMMIT_ID",
+                        f"Commit id '{commit_id}' is too short, use at least 12 characters",
+                        line_num=line_num, line_content=line
+                    )
+                # Check Fixes format: should be Fixes: <hash> ("commit subject")
+                if not re.match(r'^Fixes:\s+[0-9a-fA-F]{12,}\s+\("[^"]+"\)\s*$', line):
+                    self.add_result(
+                        "WARNING", "BAD_FIXES_TAG",
+                        "Fixes: tag format should be: Fixes: <12+ char hash> (\"commit subject\")",
+                        line_num=line_num, line_content=line
+                    )
+
+    def format_results(self, show_types: bool = True) -> str:
+        """Format the results for output."""
+        output = []
+
+        for result in self.results:
+            if result.filename and result.line_num:
+                prefix = f"{result.filename}:{result.line_num}:"
+            elif result.filename:
+                prefix = f"{result.filename}:"
+            else:
+                prefix = ""
+
+            type_str = f" [{result.type_name}]" if show_types else ""
+            output.append(f"{result.level}:{type_str} {result.message}")
+
+            if prefix:
+                output.append(f"#  {prefix}")
+            if result.line_content:
+                output.append(f"+  {result.line_content}")
+            output.append("")
+
+        return "\n".join(output)
+
+    def get_summary(self) -> str:
+        """Get a summary of the check results."""
+        return f"total: {self.errors} errors, {self.warnings} warnings, {self.checks} checks, {self.lines_checked} lines checked"
+
+
+def split_mbox(content: str) -> list[str]:
+    """Split an mbox file into individual messages.
+    
+    Mbox format uses 'From ' at the start of a line as message separator.
+    """
+    messages = []
+    current = []
+    
+    for line in content.split('\n'):
+        # Standard mbox separator: line starting with "From " followed by
+        # an address or identifier and a date
+        if line.startswith('From ') and current:
+            messages.append('\n'.join(current))
+            current = [line]
+        else:
+            current.append(line)
+    
+    if current:
+        messages.append('\n'.join(current))
+    
+    return messages
+
+
+def check_single_patch(checker: CheckPatch, patch_path: Optional[str],
+                       commit: Optional[str], verbose: bool, quiet: bool,
+                       pre_content: Optional[str] = None) -> bool:
+    """Check a single patch file or commit."""
+    subject = ""
+    content = ""
+
+    if pre_content:
+        content = pre_content
+    elif patch_path:
+        try:
+            with open(patch_path, "r", encoding="utf-8", errors="replace") as f:
+                content = f.read()
+        except IOError as e:
+            print(f"Error reading {patch_path}: {e}", file=sys.stderr)
+            return False
+    elif commit:
+        try:
+            result = subprocess.run(
+                ["git", "format-patch", "--find-renames", "--no-stat", "--stdout", "-1", commit],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode != 0:
+                print(f"Error getting commit {commit}", file=sys.stderr)
+                return False
+            content = result.stdout
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            print(f"Error running git: {e}", file=sys.stderr)
+            return False
+    else:
+        content = sys.stdin.read()
+
+    # Extract subject
+    match = re.search(r"^Subject:\s*(.+?)(?:\n(?=\S)|\n\n)", content, re.MULTILINE | re.DOTALL)
+    if match:
+        subject = match.group(1).replace("\n ", " ").strip()
+
+    if verbose:
+        print(f"\n### {subject}\n")
+
+    is_clean = checker.check_patch(content, patch_path)
+    has_issues = checker.errors > 0 or checker.warnings > 0
+
+    if has_issues or verbose:
+        if not verbose and subject:
+            print(f"\n### {subject}\n")
+        print(checker.format_results(show_types=True))
+        print(checker.get_summary())
+
+    return is_clean
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Check patches for DPDK coding style and common issues",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s patch.diff                Check a patch file
+  %(prog)s -n 3                      Check last 3 commits
+  %(prog)s -r origin/main..HEAD      Check commits in range
+  cat patch.diff | %(prog)s          Check patch from stdin
+"""
+    )
+
+    parser.add_argument("patches", nargs="*", help="Patch files to check")
+    parser.add_argument("-n", type=int, metavar="NUM",
+                       help="Check last NUM commits")
+    parser.add_argument("-r", "--range", metavar="RANGE",
+                       help="Check commits in git range (default: origin/main..)")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                       help="Quiet mode - only show summary")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                       help="Verbose mode - show all checks")
+    parser.add_argument("--max-line-length", type=int, default=DEFAULT_LINE_LENGTH,
+                       help=f"Maximum line length (default: {DEFAULT_LINE_LENGTH})")
+    parser.add_argument("--codespell", action="store_true", default=True,
+                       help="Enable spell checking (default: enabled)")
+    parser.add_argument("--no-codespell", dest="codespell", action="store_false",
+                       help="Disable spell checking")
+    parser.add_argument("--codespellfile", metavar="FILE",
+                       help="Path to codespell dictionary")
+    parser.add_argument("--show-types", action="store_true", default=True,
+                       help="Show message types (default: enabled)")
+    parser.add_argument("--no-show-types", dest="show_types", action="store_false",
+                       help="Hide message types")
+
+    return parser.parse_args()
+
+
+def main():
+    """Main entry point."""
+    args = parse_args()
+
+    # Build configuration
+    config = {
+        "max_line_length": args.max_line_length,
+        "codespell": args.codespell,
+        "show_types": args.show_types,
+    }
+
+    if args.codespellfile:
+        config["codespell_file"] = args.codespellfile
+
+    checker = CheckPatch(config)
+
+    total = 0
+    failed = 0
+
+    if args.patches:
+        # Check specified patch files
+        for patch in args.patches:
+            try:
+                with open(patch, "r", encoding="utf-8", errors="replace") as f:
+                    content = f.read()
+            except IOError as e:
+                print(f"Error reading {patch}: {e}", file=sys.stderr)
+                total += 1
+                failed += 1
+                continue
+
+            # Check if this is an mbox with multiple patches
+            messages = split_mbox(content)
+            if len(messages) > 1:
+                for msg in messages:
+                    # Only process messages that contain diffs
+                    if 'diff --git' in msg or '---' in msg:
+                        total += 1
+                        if not check_single_patch(checker, None, None, args.verbose, args.quiet, msg):
+                            failed += 1
+            else:
+                total += 1
+                if not check_single_patch(checker, patch, None, args.verbose, args.quiet):
+                    failed += 1
+
+    elif args.n or args.range:
+        # Check git commits
+        if args.n:
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", f"--max-count={args.n}", "HEAD"],
+                capture_output=True,
+                text=True
+            )
+        else:
+            git_range = args.range if args.range else "origin/main.."
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", git_range],
+                capture_output=True,
+                text=True
+            )
+
+        if result.returncode != 0:
+            print("Error getting git commits", file=sys.stderr)
+            sys.exit(1)
+
+        commits = result.stdout.strip().split("\n")
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    elif not sys.stdin.isatty():
+        # Read from stdin
+        total = 1
+        if not check_single_patch(checker, None, None, args.verbose, args.quiet):
+            failed += 1
+
+    else:
+        # Default to checking commits since origin/main
+        result = subprocess.run(
+            ["git", "rev-list", "--reverse", "origin/main.."],
+            capture_output=True,
+            text=True
+        )
+
+        commits = result.stdout.strip().split("\n") if result.stdout.strip() else []
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    # Print summary
+    passed = total - failed
+    if not args.quiet:
+        print(f"\n{passed}/{total} valid patch{'es' if passed != 1 else ''}")
+
+    sys.exit(0 if failed == 0 else 1)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] devtools: replace checkpatches shell wrapper with Python script
  2026-02-04 16:59 ` [PATCH v4] " Stephen Hemminger
@ 2026-02-04 17:29   ` Bruce Richardson
  2026-02-04 17:32   ` Bruce Richardson
  1 sibling, 0 replies; 13+ messages in thread
From: Bruce Richardson @ 2026-02-04 17:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

On Wed, Feb 04, 2026 at 08:59:17AM -0800, Stephen Hemminger wrote:
> Alternative to the checkpatches.sh shell script that wraps the Linux kernel's
> checkpatch.pl with a standalone Python implementation. The new
> dpdk-checkpatch.py script provides equivalent functionality without
> requiring the kernel source tree or Perl.
> 
> Performance is significantly improved: checking a recent flow parser
> patch series dropped from 2m23s to 0.43s (approximately 300x faster).
> 
> Supports checking individual patch files, mbox bundles containing
> multiple patches, git commit ranges (-r), last N commits (-n), or
> reading from stdin. Exit code is 0 if all patches are clean,
> 1 if any have issues.
> 
> Kernel checkpatch.pl compatible checks:
>   - AVOID_EXTERNS, UNNECESSARY_BREAK, COMPLEX_MACRO
>   - STRNCPY/STRCPY, RETURN_PARENTHESES, POINTER_LOCATION
>   - INITIALISED_STATIC, GLOBAL_INITIALISERS
>   - TRAILING_STATEMENTS, LINE_CONTINUATIONS, ONE_SEMICOLON
>   - REPEATED_WORD, CONSTANT_COMPARISON, SELF_ASSIGNMENT
>   - INLINE_LOCATION, STORAGE_CLASS, FUNCTION_WITHOUT_ARGS
>   - MACRO_WITH_FLOW_CONTROL, MULTISTATEMENT_MACRO_USE_DO_WHILE
>   - PREFER_DEFINED_ATTRIBUTE_MACRO (using DPDK __rte_* macros)
>   - TYPO_SPELLING via codespell dictionary
> 
> DPDK-specific forbidden token checks:
>   - RTE_LOG usage (prefer RTE_LOG_LINE)
>   - printf/fprintf to stdout/stderr in libs/drivers
>   - rte_panic/rte_exit, direct __attribute__ usage
>   - Deprecated atomics (rte_atomicNN_xxx, rte_smp_[rw]mb)
>   - Compiler builtins (__sync_xxx, __atomic_xxx, __builtin_xxx)
>   - pthread functions (prefer rte_thread)
>   - Reserved keywords, pragma, variadic macros
> 
> DPDK tag validation:
>   - __rte_experimental and __rte_internal placement
>   - __rte_packed_begin/end pairing
>   - __rte_aligned attribute checking
> 
> Expected differences from the shell/Perl version:
> 
> The following checks from checkpatch.pl are intentionally not
> implemented because they produced false positives on valid DPDK code:
> 
>   - DEEP_INDENTATION: tab counting without full brace-nesting
>     tracking flags legitimate patterns in switch/case blocks and
>     driver transmit paths.
>   - CAMELCASE: flags standard C macros like PRIx64 and DPDK
>     naming conventions.
>   - SPACING after cast: flags RTE_ATOMIC() macro as a cast and
>     flags unary minus in (type)-1 expressions.
>   - CONCATENATED_STRING: flags valid C format string macros
>     like "%"PRIx64.
> 
> The following checks have improved behavior:
> 
>   - AVOID_EXTERNS only flags function declarations, not data.
>     Using extern on data declarations is sometimes necessary
>     and does not need a warning.
>   - COMPLEX_MACRO does not flag C99 compound literal macros
>     like (const type[]){ ... } which are commonly used in DPDK.
>   - RETURN_PARENTHESES does not flag return with casts such
>     as return (uint16_t)(expr).
>   - TYPO_SPELLING excludes common networking abbreviations
>     (ND, NS, etc.) and Doxygen tags, and handles contractions
>     (don't, couldn't) without false positives.
>   - LINE_CONTINUATIONS properly tracks macro context across
>     patch context lines.
> 
> Known limitations:
>   - BRACES check not implemented (requires multi-line analysis)
>   - COMMIT_LOG_LONG_LINE not implemented
> 
> This script was developed with assistance from the Claude AI assistant.
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  devtools/dpdk-checkpatch.py | 1381 +++++++++++++++++++++++++++++++++++
>  1 file changed, 1381 insertions(+)
>  create mode 100755 devtools/dpdk-checkpatch.py
> 
Acked-by: Bruce Richardson <bruce.richardson@intel.com>

A minor nit, but this could do with running black on it to reformat it
according to standard rules, and to limit line lengths to 100 chars.
(Running the script on its own commit throws up long line errors as well as
a bunch of false positives)


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] devtools: replace checkpatches shell wrapper with Python script
  2026-02-04 16:59 ` [PATCH v4] " Stephen Hemminger
  2026-02-04 17:29   ` Bruce Richardson
@ 2026-02-04 17:32   ` Bruce Richardson
  2026-02-05  1:43     ` Stephen Hemminger
  1 sibling, 1 reply; 13+ messages in thread
From: Bruce Richardson @ 2026-02-04 17:32 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: dev

On Wed, Feb 04, 2026 at 08:59:17AM -0800, Stephen Hemminger wrote:
> Alternative to the checkpatches.sh shell script that wraps the Linux kernel's
> checkpatch.pl with a standalone Python implementation. The new
> dpdk-checkpatch.py script provides equivalent functionality without
> requiring the kernel source tree or Perl.
> 
> Performance is significantly improved: checking a recent flow parser
> patch series dropped from 2m23s to 0.43s (approximately 300x faster).
> 
Yep, it is definitely a LOT faster. Running on top 10 commits on main tree
with old and new script:

$ time ./devtools/checkpatches.sh -n10

10/10 valid patches

real	0m32.570s
user	0m30.517s
sys	0m2.256s


$ time ./devtools/dpdk-checkpatch.py -n10

10/10 valid patches

real	0m0.255s
user	0m0.214s
sys	0m0.040s

32.5 seconds down to a quarter of a second is a massive gain. Looking
forward to switching script for checking patches in future!

/Bruce

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4] devtools: replace checkpatches shell wrapper with Python script
  2026-02-04 17:32   ` Bruce Richardson
@ 2026-02-05  1:43     ` Stephen Hemminger
  0 siblings, 0 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-02-05  1:43 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev

On Wed, 4 Feb 2026 17:32:31 +0000
Bruce Richardson <bruce.richardson@intel.com> wrote:

> On Wed, Feb 04, 2026 at 08:59:17AM -0800, Stephen Hemminger wrote:
> > Alternative to the checkpatches.sh shell script that wraps the Linux kernel's
> > checkpatch.pl with a standalone Python implementation. The new
> > dpdk-checkpatch.py script provides equivalent functionality without
> > requiring the kernel source tree or Perl.
> > 
> > Performance is significantly improved: checking a recent flow parser
> > patch series dropped from 2m23s to 0.43s (approximately 300x faster).
> >   
> Yep, it is definitely a LOT faster. Running on top 10 commits on main tree
> with old and new script:
> 
> $ time ./devtools/checkpatches.sh -n10
> 
> 10/10 valid patches
> 
> real	0m32.570s
> user	0m30.517s
> sys	0m2.256s
> 
> 
> $ time ./devtools/dpdk-checkpatch.py -n10
> 
> 10/10 valid patches
> 
> real	0m0.255s
> user	0m0.214s
> sys	0m0.040s
> 
> 32.5 seconds down to a quarter of a second is a massive gain. Looking
> forward to switching script for checking patches in future!
> 
> /Bruce

I still am finding some unintentional differences but it much
easier to fix false positives than in old code.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v5] devtools: add Python-based patch style checker
  2026-01-31 20:48 [RFC] devtools: replace get-maintainer shell wrapper with Python script Stephen Hemminger
                   ` (3 preceding siblings ...)
  2026-02-04 16:59 ` [PATCH v4] " Stephen Hemminger
@ 2026-02-26 17:15 ` Stephen Hemminger
  2026-03-24 14:48 ` [PATCH v6] " Stephen Hemminger
  5 siblings, 0 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-02-26 17:15 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Add dpdk-checkpatch.py as a standalone alternative to checkpatches.sh
for checking patches against DPDK coding style. Unlike the existing
shell script, it does not require the Linux kernel checkpatch.pl or
Perl, and is significantly faster (e.g. ~0.4s vs ~2m23s on a recent
patch series).

The script supports the same usage patterns: checking patch files,
mbox bundles, git commit ranges (-r), last N commits (-n), or
reading from stdin. It implements the most common checks from
checkpatch.pl (coding style, spacing, line length, spelling via
codespell) along with DPDK-specific forbidden token and tag checks.

Some checks from checkpatch.pl that produced frequent false positives
on valid DPDK code (DEEP_INDENTATION, CAMELCASE, SPACING after cast,
CONCATENATED_STRING) are intentionally omitted. A few checks are not
yet implemented (BRACES, COMMIT_LOG_LONG_LINE).

This script was developed with assistance from the Claude AI assistant.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 devtools/dpdk-checkpatch.py | 1414 +++++++++++++++++++++++++++++++++++
 1 file changed, 1414 insertions(+)
 create mode 100755 devtools/dpdk-checkpatch.py

diff --git a/devtools/dpdk-checkpatch.py b/devtools/dpdk-checkpatch.py
new file mode 100755
index 0000000000..ed797fd5b6
--- /dev/null
+++ b/devtools/dpdk-checkpatch.py
@@ -0,0 +1,1414 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2026 Stephen Hemminger
+#
+# dpdk-checkpatch.py - Check patches for common style issues
+#
+# This is a standalone Python replacement for the DPDK checkpatches.sh
+# script that previously wrapped the Linux kernel's checkpatch.pl.
+#
+# Usage examples:
+#   # Check patch files
+#   dpdk-checkpatch.py *.patch
+#
+#   # Check patches before applying
+#   dpdk-checkpatch.py *.patch && git am *.patch
+#
+#   # Check commits since origin/main
+#   dpdk-checkpatch.py
+#
+#   # Quiet mode for scripting
+#   if dpdk-checkpatch.py -q "$patch"; then
+#       echo "Clean, applying..."
+#       git am "$patch"
+#   else
+#       echo "Issues found, skipping"
+#   fi
+#
+#   # Verbose output with context
+#   dpdk-checkpatch.py -v my-feature.patch
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+VERSION = "1.0"
+
+# Default configuration
+DEFAULT_LINE_LENGTH = 100
+DEFAULT_CODESPELL_DICT = "/usr/share/codespell/dictionary.txt"
+
+
+@dataclass
+class CheckResult:
+    """Result of a single check."""
+    level: str  # ERROR, WARNING, CHECK
+    type_name: str
+    message: str
+    filename: str = ""
+    line_num: int = 0
+    line_content: str = ""
+
+
+@dataclass
+class PatchInfo:
+    """Information extracted from a patch."""
+    subject: str = ""
+    author: str = ""
+    author_email: str = ""
+    signoffs: list = field(default_factory=list)
+    files: list = field(default_factory=list)
+    added_lines: dict = field(default_factory=dict)  # filename -> [(line_num, content)]
+    context_before: dict = field(default_factory=dict)  # filename -> {line_num: context_line}
+    has_fixes_tag: bool = False
+    fixes_commits: list = field(default_factory=list)
+
+
+class CheckPatch:
+    """Main class for checking patches."""
+
+    def __init__(self, config: dict):
+        self.config = config
+        self.results: list[CheckResult] = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Load codespell dictionary if enabled
+        self.spelling_dict = {}
+        if config.get("codespell"):
+            self._load_codespell_dict()
+
+        # Forbidden token rules for DPDK
+        self.forbidden_rules = self._init_forbidden_rules()
+
+    def _load_codespell_dict(self) -> None:
+        """Load the codespell dictionary."""
+        dict_path = self.config.get("codespell_file")
+
+        if not dict_path:
+            # Search common locations for the dictionary
+            search_paths = [
+                DEFAULT_CODESPELL_DICT,
+                "/usr/local/lib/python3.12/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.11/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.10/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/lib/python3/dist-packages/codespell_lib/data/dictionary.txt",
+            ]
+
+            # Also try to find it via codespell module
+            try:
+                import codespell_lib
+                module_path = os.path.join(
+                    os.path.dirname(codespell_lib.__file__),
+                    'data', 'dictionary.txt'
+                )
+                search_paths.insert(0, module_path)
+            except ImportError:
+                pass
+
+            for path in search_paths:
+                if os.path.exists(path):
+                    dict_path = path
+                    break
+
+        if not dict_path or not os.path.exists(dict_path):
+            return
+
+        try:
+            with open(dict_path, "r", encoding="utf-8", errors="ignore") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith("#"):
+                        continue
+                    parts = line.split("->")
+                    if len(parts) >= 2:
+                        wrong = parts[0].strip().lower()
+                        correct = parts[1].strip().split(",")[0].strip()
+                        self.spelling_dict[wrong] = correct
+        except IOError:
+            pass
+
+    def _init_forbidden_rules(self) -> list:
+        """Initialize DPDK-specific forbidden token rules."""
+        return [
+            # Refrain from new calls to RTE_LOG in libraries
+            {
+                "folders": ["lib"],
+                "patterns": [r"RTE_LOG\("],
+                "message": "Prefer RTE_LOG_LINE",
+            },
+            # Refrain from new calls to RTE_LOG in drivers
+            {
+                "folders": ["drivers"],
+                "skip_files": [r".*osdep\.h$"],
+                "patterns": [r"RTE_LOG\(", r"RTE_LOG_DP\(", r"rte_log\("],
+                "message": "Prefer RTE_LOG_LINE/RTE_LOG_DP_LINE",
+            },
+            # No output on stdout or stderr
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bprintf\b", r"fprintf\(stdout,", r"fprintf\(stderr,"],
+                "message": "Writing to stdout or stderr",
+            },
+            # Refrain from rte_panic() and rte_exit()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_panic\(", r"rte_exit\("],
+                "message": "Using rte_panic/rte_exit",
+            },
+            # Don't call directly install_headers()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\binstall_headers\b"],
+                "message": "Using install_headers()",
+            },
+            # Refrain from using compiler attribute without common macro
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"__attribute__"],
+                "message": "Using compiler attribute directly",
+            },
+            # Check %l or %ll format specifier
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"%ll*[xud]"],
+                "message": "Using %l format, prefer %PRI*64 if type is [u]int64_t",
+            },
+            # Refrain from 16/32/64 bits rte_atomicNN_xxx()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_atomic[0-9][0-9]_.*\("],
+                "message": "Using rte_atomicNN_xxx",
+            },
+            # Refrain from rte_smp_[r/w]mb()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_smp_(r|w)?mb\("],
+                "message": "Using rte_smp_[r/w]mb",
+            },
+            # Refrain from __sync_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__sync_.*\("],
+                "message": "Using __sync_xxx builtins",
+            },
+            # Refrain from __rte_atomic_thread_fence()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__rte_atomic_thread_fence\("],
+                "message": "Using __rte_atomic_thread_fence, prefer rte_atomic_thread_fence",
+            },
+            # Refrain from __atomic_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"drivers/common/cnxk/"],
+                "patterns": [r"__atomic_.*\(", r"__ATOMIC_(RELAXED|CONSUME|ACQUIRE|RELEASE|ACQ_REL|SEQ_CST)"],
+                "message": "Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx",
+            },
+            # Refrain from some pthread functions
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"pthread_(create|join|detach|set(_?name_np|affinity_np)|attr_set(inheritsched|schedpolicy))\("],
+                "message": "Using pthread functions, prefer rte_thread",
+            },
+            # Forbid use of __reserved
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__reserved\b"],
+                "message": "Using __reserved",
+            },
+            # Forbid use of __alignof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__alignof__\b"],
+                "message": "Using __alignof__, prefer C11 alignof",
+            },
+            # Forbid use of __typeof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__typeof__\b"],
+                "message": "Using __typeof__, prefer typeof",
+            },
+            # Forbid use of __builtin_*
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/", r"drivers/.*/base/", r"drivers/.*osdep\.h$"],
+                "patterns": [r"\b__builtin_"],
+                "message": "Using __builtin helpers, prefer EAL macros",
+            },
+            # Forbid inclusion of linux/pci_regs.h
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"include.*linux/pci_regs\.h"],
+                "message": "Using linux/pci_regs.h, prefer rte_pci.h",
+            },
+            # Forbid variadic argument pack extension in macros
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"#\s*define.*[^(,\s]\.\.\.[\s]*\)"],
+                "message": "Do not use variadic argument pack in macros",
+            },
+            # Forbid __rte_packed_begin with enums
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"enum.*__rte_packed_begin"],
+                "message": "Using __rte_packed_begin with enum is not allowed",
+            },
+            # Forbid use of #pragma
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"(#pragma|_Pragma)"],
+                "message": "Using compilers pragma is not allowed",
+            },
+            # Forbid experimental build flag except in examples
+            {
+                "folders": ["lib", "drivers", "app"],
+                "patterns": [r"-DALLOW_EXPERIMENTAL_API", r"allow_experimental_apis"],
+                "message": "Using experimental build flag for in-tree compilation",
+            },
+            # Refrain from using RTE_LOG_REGISTER for drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bRTE_LOG_REGISTER\b"],
+                "message": "Using RTE_LOG_REGISTER, prefer RTE_LOG_REGISTER_(DEFAULT|SUFFIX)",
+            },
+            # Forbid non-internal thread in drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_thread_(set_name|create_control)\("],
+                "message": "Prefer rte_thread_(set_prefixed_name|create_internal_control)",
+            },
+        ]
+
+    def add_result(self, level: str, type_name: str, message: str,
+                   filename: str = "", line_num: int = 0, line_content: str = "") -> None:
+        """Add a check result."""
+        result = CheckResult(
+            level=level,
+            type_name=type_name,
+            message=message,
+            filename=filename,
+            line_num=line_num,
+            line_content=line_content
+        )
+        self.results.append(result)
+
+        if level == "ERROR":
+            self.errors += 1
+        elif level == "WARNING":
+            self.warnings += 1
+        else:
+            self.checks += 1
+
+    def parse_patch(self, content: str) -> PatchInfo:
+        """Parse a patch and extract information."""
+        info = PatchInfo()
+        current_file = ""
+        in_diff = False
+        line_num_in_new = 0
+
+        lines = content.split("\n")
+        for i, line in enumerate(lines):
+            # Extract subject
+            if line.startswith("Subject:"):
+                subject = line[8:].strip()
+                # Handle multi-line subjects
+                j = i + 1
+                while j < len(lines) and lines[j].startswith(" "):
+                    subject += " " + lines[j].strip()
+                    j += 1
+                info.subject = subject
+
+            # Extract author
+            if line.startswith("From:"):
+                info.author = line[5:].strip()
+                match = re.search(r"<([^>]+)>", info.author)
+                if match:
+                    info.author_email = match.group(1)
+
+            # Extract Signed-off-by
+            match = re.match(r"^Signed-off-by:\s*(.+)$", line, re.IGNORECASE)
+            if match:
+                info.signoffs.append(match.group(1).strip())
+
+            # Extract Fixes tag
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                info.has_fixes_tag = True
+                info.fixes_commits.append(match.group(1))
+
+            # Track files in diff
+            if line.startswith("diff --git"):
+                match = re.match(r"diff --git a/(\S+) b/(\S+)", line)
+                if match:
+                    current_file = match.group(2)
+                    if current_file not in info.files:
+                        info.files.append(current_file)
+                    info.added_lines[current_file] = []
+                in_diff = True
+
+            # Track hunks
+            if line.startswith("@@"):
+                match = re.match(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
+                if match:
+                    line_num_in_new = int(match.group(1))
+                continue
+
+            # Track added lines
+            if in_diff and current_file:
+                if line.startswith("+") and not line.startswith("+++"):
+                    info.added_lines[current_file].append((line_num_in_new, line[1:]))
+                    line_num_in_new += 1
+                elif line.startswith("-"):
+                    pass  # Deleted line, don't increment
+                elif not line.startswith("\\"):
+                    # Context line - store it for reference by line number
+                    if current_file not in info.context_before:
+                        info.context_before[current_file] = {}
+                    info.context_before[current_file][line_num_in_new] = line[1:] if line.startswith(" ") else line
+                    line_num_in_new += 1
+
+        return info
+
+    def check_line_length(self, patch_info: PatchInfo) -> None:
+        """Check for lines exceeding maximum length."""
+        max_len = self.config.get("max_line_length", DEFAULT_LINE_LENGTH)
+
+        for filename, lines in patch_info.added_lines.items():
+            # Skip documentation files - they have tables and other content
+            # where long lines are acceptable
+            if filename.endswith(('.rst', '.md', '.txt')) or '/doc/' in filename:
+                continue
+
+            for line_num, content in lines:
+                # Skip strings that span multiple lines
+                if len(content) > max_len:
+                    # Don't warn about long strings or URLs
+                    if '\"' in content and content.count('\"') >= 2:
+                        continue
+                    if "http://" in content or "https://" in content:
+                        continue
+                    # Check if it's a comment line
+                    if content.strip().startswith("/*") or content.strip().startswith("*") or content.strip().startswith("//"):
+                        self.add_result(
+                            "WARNING", "LONG_LINE_COMMENT",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+                    else:
+                        self.add_result(
+                            "WARNING", "LONG_LINE",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename, line_num, content
+                        )
+
+    def check_trailing_whitespace(self, patch_info: PatchInfo) -> None:
+        """Check for trailing whitespace."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if content != content.rstrip():
+                    self.add_result(
+                        "WARNING", "TRAILING_WHITESPACE",
+                        "trailing whitespace",
+                        filename, line_num, content
+                    )
+
+    def check_tabs_spaces(self, patch_info: PatchInfo) -> None:
+        """Check for space before tab and mixed indentation."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if " \t" in content:
+                    self.add_result(
+                        "WARNING", "SPACE_BEFORE_TAB",
+                        "space before tab in indent",
+                        filename, line_num, content
+                    )
+
+    def check_signoff(self, patch_info: PatchInfo) -> None:
+        """Check for Signed-off-by line."""
+        if not patch_info.signoffs:
+            self.add_result(
+                "ERROR", "MISSING_SIGN_OFF",
+                "Missing Signed-off-by: line(s)"
+            )
+
+    def check_coding_style(self, patch_info: PatchInfo) -> None:
+        """Check various coding style issues."""
+        for filename, lines in patch_info.added_lines.items():
+            # Skip non-C files for most checks
+            is_c_file = filename.endswith((".c", ".h"))
+            is_c_source = filename.endswith(".c")
+            is_header = filename.endswith(".h")
+
+            prev_line = ""
+            indent_stack = []
+            context_before = patch_info.context_before.get(filename, {})
+            for line_num, content in lines:
+                self.lines_checked += 1
+
+                # Check if the line immediately before this one (which may be
+                # a context line from the patch) ended with backslash continuation
+                prev_context = context_before.get(line_num - 1, "")
+                in_macro_continuation = prev_context.rstrip().endswith("\\")
+
+                if is_c_file:
+                    # Check for extern function declarations in .c files
+                    # Only flag functions (have parentheses), not data
+                    if is_c_source and re.match(r"^\s*extern\b", content):
+                        if re.search(r'\(', content):
+                            self.add_result(
+                                "WARNING", "AVOID_EXTERNS",
+                                "extern is not needed for function declarations",
+                                filename, line_num, content
+                            )
+
+                    # Check for unnecessary break after goto/return/continue
+                    # Only flag if the previous statement is unconditional (not inside an if)
+                    if re.match(r"^\s*break\s*;", content):
+                        # Check if previous line is an unconditional return/goto/continue
+                        # It's unconditional if it starts at the same or lower indentation as break
+                        # or if it's a plain return/goto not inside an if block
+                        prev_stripped = prev_line.strip() if prev_line else ""
+                        if re.match(r"^(goto\s+\w+|return\b|continue)\s*[^;]*;\s*$", prev_stripped):
+                            # Check indentation - if prev line has same or less indentation, it's unconditional
+                            break_indent = len(content) - len(content.lstrip())
+                            prev_indent = len(prev_line) - len(prev_line.lstrip()) if prev_line else 0
+                            # Only flag if the return/goto is at the same indentation level
+                            # (meaning it's not inside a nested if block)
+                            if prev_indent <= break_indent:
+                                self.add_result(
+                                    "WARNING", "UNNECESSARY_BREAK",
+                                    "break is not useful after a goto or return",
+                                    filename, line_num, content
+                                )
+
+                    # STRNCPY: should use strlcpy
+                    if re.search(r"\bstrncpy\s*\(", content):
+                        self.add_result(
+                            "WARNING", "STRNCPY",
+                            "Prefer strlcpy over strncpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/",
+                            filename, line_num, content
+                        )
+
+                    # STRCPY: unsafe string copy
+                    if re.search(r"\bstrcpy\s*\(", content):
+                        self.add_result(
+                            "ERROR", "STRCPY",
+                            "strcpy is unsafe - use strlcpy or snprintf",
+                            filename, line_num, content
+                        )
+
+                    # Check for complex macros without proper enclosure
+                    # Note: Compound literal macros like (type[]){...} are valid C99
+                    # and commonly used in DPDK, so we don't flag those.
+                    # Only flag macros with multiple statements without do-while wrapping.
+                    if re.match(r"^\s*#\s*define\s+\w+\s*\([^)]*\)\s+\{", content):
+                        # Macro body starts with { but is not a compound literal
+                        # Check if it's missing do { } while(0)
+                        if not re.search(r"\bdo\s*\{", content):
+                            self.add_result(
+                                "ERROR", "COMPLEX_MACRO",
+                                "Macros with complex values should be enclosed in parentheses or do { } while(0)",
+                                filename, line_num, content
+                            )
+
+                    # SPACING: missing space before ( in control statements
+                    if re.search(r"\b(if|while|for|switch)\(", content):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space required before the open parenthesis '('",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited after open square bracket
+                    # Remove string contents first to avoid false positives
+                    code_only = re.sub(r'"[^"]*"', '""', content)
+                    if re.search(r"\[\s+[^\]]", code_only) and not re.search(r"\[\s*\]", code_only):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited after that open square bracket '['",
+                            filename, line_num, content
+                        )
+
+                    # SPACING: space prohibited before close square bracket
+                    if re.search(r"[^\[]\s+\]", code_only):
+                        self.add_result(
+                            "WARNING", "SPACING",
+                            "space prohibited before that close square bracket ']'",
+                            filename, line_num, content
+                        )
+
+                    # RETURN_PARENTHESES: return with parentheses
+                    if re.search(r"\breturn\s*\([^;]+\)\s*;", content):
+                        # Avoid false positives for:
+                        # - function calls: return (func())
+                        # - casts: return (type)expr or return (type)(expr)
+                        if not re.search(r"\breturn\s*\(\s*\w+\s*\([^)]*\)\s*\)\s*;", content) and \
+                           not re.search(r"\breturn\s+\([a-zA-Z_][\w\s\*]*\)", content):
+                            self.add_result(
+                                "WARNING", "RETURN_PARENTHESES",
+                                "return is not a function, parentheses are not required",
+                                filename, line_num, content
+                            )
+
+                    # BRACES: single statement blocks that need braces
+                    # Check for if/else/while/for without braces on multiline
+                    if re.match(r"^\s*(if|else\s+if|while|for)\s*\([^{]*$", content):
+                        # Control statement without opening brace - check next line
+                        pass  # Would need lookahead
+
+                    # INITIALISED_STATIC: static initialized to 0/NULL
+                    if re.match(r"^\s*static\s+.*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        self.add_result(
+                            "WARNING", "INITIALISED_STATIC",
+                            "do not initialise statics to 0 or NULL",
+                            filename, line_num, content
+                        )
+
+                    # GLOBAL_INITIALISERS: global initialized to 0/NULL
+                    if re.match(r"^[a-zA-Z_][a-zA-Z0-9_\s\*]*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content):
+                        if not re.match(r"^\s*static\s+", content):
+                            self.add_result(
+                                "WARNING", "GLOBAL_INITIALISERS",
+                                "do not initialise globals to 0 or NULL",
+                                filename, line_num, content
+                            )
+
+                    # Note: DEEP_INDENTATION check removed - without full brace
+                    # nesting tracking (as in checkpatch.pl), tab counting produces
+                    # too many false positives in legitimate code like switch/case
+                    # blocks and nested loops in driver transmit paths.
+
+                    # TRAILING_STATEMENTS: code on same line as } OR control statement
+                    # But allow struct/union member declarations: } name; or } name; /* comment */
+                    # Skip macro definitions - they often have intentional one-line constructs
+                    is_macro_line = content.rstrip().endswith("\\") or re.match(r"^\s*#\s*define", content)
+                    if not is_macro_line:
+                        if re.search(r"\}\s*[a-zA-Z_]", content) and not re.search(r"\}\s*(else|while)\b", content):
+                            # Check if this is a struct/union variable declaration
+                            # Pattern: } name; or } name[]; or } name = {...}; or with comment
+                            if not re.search(r"\}\s*\w+\s*(\[\d*\])?\s*(=\s*\{[^}]*\}\s*)?;\s*(/\*.*\*/|//.*)?\s*$", content):
+                                self.add_result(
+                                    "ERROR", "TRAILING_STATEMENTS",
+                                    "trailing statements should be on next line",
+                                    filename, line_num, content
+                                )
+                        # Also check for if/while/for with statement on same line (not opening brace)
+                        # Pattern: if (cond) statement; or if (cond) statement; /* comment */
+                        if re.search(r"\b(if|while|for)\s*\([^)]+\)\s+(?![\s{])[^;]*;", content):
+                            self.add_result(
+                                "ERROR", "TRAILING_STATEMENTS",
+                                "trailing statements should be on next line",
+                                filename, line_num, content
+                            )
+
+                    # CONSTANT_COMPARISON: Yoda conditions (constant on left)
+                    # Check for constants on left side of any comparison operator
+                    # Skip comments
+                    cmp_content = content.strip()
+                    if not cmp_content.startswith(('/*', '*', '//')):
+                        if re.search(r'\b(NULL|true|false)\s*[!=<>]=?\s*[&*\w]', cmp_content) or \
+                           re.search(r'[\s(]\s*0\s*[!=<>]=?\s*[&*\w]', cmp_content):
+                            # Exclude static_assert - operand order doesn't matter
+                            if not re.match(r'^\s*static_assert\s*\(', cmp_content):
+                                self.add_result(
+                                    "WARNING", "CONSTANT_COMPARISON",
+                                    "Comparisons should place the constant on the right side",
+                                    filename, line_num, content
+                                )
+
+                    # BRACES: single statement block should not have braces (or vice versa)
+                    # Check for if/else/while/for with single statement in braces
+                    if re.match(r"^\s*(if|while|for)\s*\([^)]+\)\s*\{\s*$", prev_line):
+                        if re.match(r"^\s*\w.*;\s*$", content) and not re.search(r"^\s*(if|else|while|for|switch|case|default|return\s*;)", content):
+                            # Check if next line is just closing brace - would need lookahead
+                            pass
+
+                    # ONE_SEMICOLON: double semicolon
+                    if re.search(r";;", content) and not re.search(r"for\s*\([^)]*;;", content):
+                        self.add_result(
+                            "WARNING", "ONE_SEMICOLON",
+                            "Statements terminations use 1 semicolon",
+                            filename, line_num, content
+                        )
+
+                    # CODE_INDENT/LEADING_SPACE: spaces used for indentation instead of tabs
+                    if re.match(r"^    +[^\s]", content) and not content.strip().startswith("*"):
+                        # Line starts with spaces (not tabs) - but allow for alignment in comments
+                        self.add_result(
+                            "WARNING", "CODE_INDENT",
+                            "code indent should use tabs where possible",
+                            filename, line_num, content
+                        )
+
+                    # LEADING_SPACE: spaces at start of line (more general)
+                    if re.match(r"^ +\t", content):
+                        self.add_result(
+                            "WARNING", "LEADING_SPACE",
+                            "please, no spaces at the start of a line",
+                            filename, line_num, content
+                        )
+
+                    # LINE_CONTINUATIONS: backslash continuation outside macros
+                    # Check if this line has a backslash continuation
+                    if content.rstrip().endswith("\\"):
+                        # Only flag if not inside a macro definition
+                        # A macro context means either:
+                        # - This line starts a #define
+                        # - The previous line (added or context) was a continuation
+                        # - This line is a preprocessor directive
+                        is_in_macro = (
+                            re.match(r"^\s*#", content) or
+                            (prev_line and prev_line.rstrip().endswith("\\")) or
+                            in_macro_continuation
+                        )
+                        if not is_in_macro:
+                            self.add_result(
+                                "WARNING", "LINE_CONTINUATIONS",
+                                "Avoid unnecessary line continuations",
+                                filename, line_num, content
+                            )
+
+                    # FUNCTION_WITHOUT_ARGS: empty parens instead of (void)
+                    if is_header and re.search(r"\b\w+\s*\(\s*\)\s*;", content):
+                        if not re.search(r"\b(while|if|for|switch|return)\s*\(\s*\)", content):
+                            self.add_result(
+                                "ERROR", "FUNCTION_WITHOUT_ARGS",
+                                "Bad function definition - use (void) instead of ()",
+                                filename, line_num, content
+                            )
+
+                    # INLINE_LOCATION: inline should come after storage class
+                    if re.match(r"^\s*inline\s+(static|extern)", content):
+                        self.add_result(
+                            "ERROR", "INLINE_LOCATION",
+                            "inline keyword should sit between storage class and type",
+                            filename, line_num, content
+                        )
+
+                    # STATIC_CONST: const should come after static
+                    if re.match(r"^\s*const\s+static\b", content):
+                        self.add_result(
+                            "WARNING", "STATIC_CONST",
+                            "Move const after static - use 'static const'",
+                            filename, line_num, content
+                        )
+                        self.add_result(
+                            "WARNING", "STORAGE_CLASS",
+                            "storage class should be at the beginning of the declaration",
+                            filename, line_num, content
+                        )
+
+                    # CONST_CONST: const used twice
+                    if re.search(r"\bconst\s+\w+\s+const\b", content):
+                        self.add_result(
+                            "WARNING", "CONST_CONST",
+                            "const used twice - remove duplicate const",
+                            filename, line_num, content
+                        )
+
+                    # SELF_ASSIGNMENT: x = x (simple variable, not struct members)
+                    # Match only simple identifiers, not struct/pointer member access
+                    match = re.search(r"^\s*(\w+)\s*=\s*(\w+)\s*;", content)
+                    if match and match.group(1) == match.group(2):
+                        self.add_result(
+                            "WARNING", "SELF_ASSIGNMENT",
+                            "Do not use self-assignments to avoid compiler warnings",
+                            filename, line_num, content
+                        )
+
+                    # PREFER_DEFINED_ATTRIBUTE_MACRO: prefer DPDK/kernel macros over __attribute__
+                    attr_macros = {
+                        'cold': '__rte_cold',
+                        'hot': '__rte_hot', 
+                        'noinline': '__rte_noinline',
+                        'always_inline': '__rte_always_inline',
+                        'unused': '__rte_unused',
+                        'packed': '__rte_packed',
+                        'aligned': '__rte_aligned',
+                        'weak': '__rte_weak',
+                        'pure': '__rte_pure',
+                    }
+                    for attr, replacement in attr_macros.items():
+                        if re.search(rf'__attribute__\s*\(\s*\(\s*{attr}\b', content):
+                            self.add_result(
+                                "WARNING", "PREFER_DEFINED_ATTRIBUTE_MACRO",
+                                f"Prefer {replacement} over __attribute__(({attr}))",
+                                filename, line_num, content
+                            )
+
+                    # POINTER_LOCATION: "type*" should be "type *"
+                    # Catches both declarations (uint8_t* ptr) and
+                    # cast expressions ((uint8_t* )val).
+                    # Skip comments and strings.
+                    stripped = content.strip()
+                    if not stripped.startswith(('/*', '*', '//')):
+                        # Match type* followed by space+identifier or space+)
+                        pointer_match = re.search(r"\b(\w+)\*\s+(\w|\))", content)
+                        if pointer_match:
+                            type_name = pointer_match.group(1)
+                            is_type = (
+                                type_name in ('char', 'int', 'void', 'short', 'long',
+                                             'float', 'double', 'unsigned', 'signed',
+                                             'const', 'volatile', 'struct', 'union',
+                                             'enum') or
+                                type_name.endswith('_t') or
+                                type_name.startswith('rte_')
+                            )
+                            if is_type:
+                                self.add_result(
+                                    "ERROR", "POINTER_LOCATION",
+                                    "\"foo* bar\" should be \"foo *bar\"",
+                                    filename, line_num, content
+                                )
+
+                    # MACRO_WITH_FLOW_CONTROL: macros with return/goto/break
+                    if re.match(r"^\s*#\s*define\s+\w+.*\b(return|goto|break|continue)\b", content):
+                        self.add_result(
+                            "WARNING", "MACRO_WITH_FLOW_CONTROL",
+                            "Macros with flow control statements should be avoided",
+                            filename, line_num, content
+                        )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros with multiple statements
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+.*;\s*[^\\]", content):
+                        if not re.search(r"do\s*\{", content):
+                            self.add_result(
+                                "WARNING", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                                "Macros with multiple statements should use do {} while(0)",
+                                filename, line_num, content
+                            )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros starting with if
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+if\s*\(", content):
+                        self.add_result(
+                            "ERROR", "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                            "Macros starting with if should be enclosed by a do - while loop",
+                            filename, line_num, content
+                        )
+
+                    # Multiple statements on one line (skip comments and strings)
+                    stripped_content = content.strip()
+                    if re.search(r";\s*[a-zA-Z_]", content) and "for" not in content:
+                        # Skip if line is a comment
+                        if not (stripped_content.startswith("/*") or 
+                                stripped_content.startswith("*") or 
+                                stripped_content.startswith("//")):
+                            # Skip if the semicolon is inside a string or comment
+                            # Remove strings and comments before checking
+                            code_only = re.sub(r'"[^"]*"', '""', content)  # Remove string contents
+                            code_only = re.sub(r'/\*.*?\*/', '', code_only)  # Remove /* */ comments
+                            code_only = re.sub(r'//.*$', '', code_only)  # Remove // comments
+                            if re.search(r";\s*[a-zA-Z_]", code_only):
+                                self.add_result(
+                                    "CHECK", "MULTIPLE_STATEMENTS",
+                                    "multiple statements on one line",
+                                    filename, line_num, content
+                                )
+
+                    # Check for C99 comments in headers that should use C89
+                    if is_header and "//" in content:
+                        # Only flag if not in a string
+                        stripped = re.sub(r'"[^"]*"', '', content)
+                        if "//" in stripped:
+                            self.add_result(
+                                "CHECK", "C99_COMMENTS",
+                                "C99 // comments are acceptable but /* */ is preferred in headers",
+                                filename, line_num, content
+                            )
+
+                    # BLOCK_COMMENT_STYLE: block comments style issues
+                    # Leading /* on its own line (but allow Doxygen /** style)
+                    if re.match(r"^\s*/\*\*+\s*$", content):
+                        # Allow /** (Doxygen) but not /*** or more
+                        if not re.match(r"^\s*/\*\*\s*$", content):
+                            self.add_result(
+                                "WARNING", "BLOCK_COMMENT_STYLE",
+                                "Block comments should not use a leading /* on a line by itself",
+                                filename, line_num, content
+                            )
+                    # Trailing */ on separate line after block comment
+                    if re.match(r"^\s*\*+/\s*$", content) and prev_line.strip().startswith("*"):
+                        pass  # This is actually acceptable
+                    # Block with trailing */ but content before it (like === */)
+                    if re.search(r"\S\s*=+\s*\*/\s*$", content):
+                        self.add_result(
+                            "WARNING", "BLOCK_COMMENT_STYLE",
+                            "Block comments use a trailing */ on a separate line",
+                            filename, line_num, content
+                        )
+
+                    # REPEATED_WORD: check for repeated words
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        # Skip common valid repeated patterns
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                    # STRING_FRAGMENTS: unnecessary string concatenation like "foo" "bar"
+                    # Must have closing quote, whitespace, opening quote pattern
+                    if re.search(r'"\s*"\s*[^)]', content) and not re.search(r'#\s*define', content):
+                        # Verify it's actually two separate strings being concatenated
+                        # by checking for the pattern: "..." "..."
+                        if re.search(r'"[^"]*"\s+"[^"]*"', content):
+                            self.add_result(
+                                "CHECK", "STRING_FRAGMENTS",
+                                "Consecutive strings are generally better as a single string",
+                                filename, line_num, content
+                            )
+
+                prev_line = content
+
+    def check_spelling(self, patch_info: PatchInfo) -> None:
+        """Check for spelling errors using codespell dictionary."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # REPEATED_WORD check for non-C files (C files handled in check_coding_style)
+                if not filename.endswith((".c", ".h")):
+                    words = re.findall(r'\b(\w+)\s+\1\b', content, re.IGNORECASE)
+                    for word in words:
+                        word_lower = word.lower()
+                        if word_lower not in ('that', 'had', 'long', 'int', 'short'):
+                            self.add_result(
+                                "WARNING", "REPEATED_WORD",
+                                f"Possible repeated word: '{word}'",
+                                filename, line_num, content
+                            )
+
+                # Spelling check
+                if self.spelling_dict:
+                    # Common abbreviations that should not be flagged as typos
+                    abbreviations = {
+                        'nd', 'ns', 'na', 'ra', 'rs',  # IPv6 Neighbor Discovery
+                        'tx', 'rx', 'id', 'io', 'ip',  # Common networking
+                        'tcp', 'udp', 'arp', 'dns',    # Protocols  
+                        'hw', 'sw', 'fw',              # Hardware/Software/Firmware
+                        'src', 'dst', 'ptr', 'buf',    # Common code abbreviations
+                        'cfg', 'ctx', 'idx', 'cnt',    # Config/Context/Index/Count
+                        'len', 'num', 'max', 'min',    # Length/Number/Max/Min
+                        'prev', 'next', 'curr',        # Previous/Next/Current
+                        'init', 'fini', 'deinit',      # Initialize/Finish
+                        'alloc', 'dealloc', 'realloc', # Memory
+                        'endcode',                      # Doxygen tag
+                    }
+                    # Extract words, but skip contractions (don't, couldn't, etc.)
+                    # by removing them before word extraction
+                    spell_content = re.sub(r"[a-zA-Z]+n't\b", '', content)
+                    spell_content = re.sub(r"[a-zA-Z]+'[a-zA-Z]+", '', spell_content)
+                    words = re.findall(r'\b[a-zA-Z]+\b', spell_content)
+                    for word in words:
+                        lower_word = word.lower()
+                        if lower_word in self.spelling_dict and lower_word not in abbreviations:
+                            self.add_result(
+                                "WARNING", "TYPO_SPELLING",
+                                f"'{word}' may be misspelled - perhaps '{self.spelling_dict[lower_word]}'?",
+                                filename, line_num, content
+                            )
+
+    def check_forbidden_tokens(self, patch_info: PatchInfo) -> None:
+        """Check for DPDK-specific forbidden tokens."""
+        for filename, lines in patch_info.added_lines.items():
+            for rule in self.forbidden_rules:
+                # Check if file is in one of the target folders
+                in_folder = False
+                for folder in rule["folders"]:
+                    if filename.startswith(folder + "/") or filename.startswith("b/" + folder + "/"):
+                        in_folder = True
+                        break
+
+                if not in_folder:
+                    continue
+
+                # Check if file should be skipped
+                skip = False
+                for skip_pattern in rule.get("skip_files", []):
+                    if re.search(skip_pattern, filename):
+                        skip = True
+                        break
+
+                if skip:
+                    continue
+
+                # Check each line for forbidden patterns
+                for line_num, content in lines:
+                    for pattern in rule["patterns"]:
+                        if re.search(pattern, content):
+                            self.add_result(
+                                "WARNING", "FORBIDDEN_TOKEN",
+                                rule["message"],
+                                filename, line_num, content
+                            )
+                            break
+
+    def check_experimental_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_experimental tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_experimental" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            f"Please only put __rte_experimental tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_experimental":
+                        self.add_result(
+                            "WARNING", "EXPERIMENTAL_TAG",
+                            "__rte_experimental must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_internal_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_internal tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_internal" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            f"Please only put __rte_internal tags in headers ({filename})",
+                            filename, line_num, content
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_internal":
+                        self.add_result(
+                            "WARNING", "INTERNAL_TAG",
+                            "__rte_internal must appear alone on the line immediately preceding the return type of a function",
+                            filename, line_num, content
+                        )
+
+    def check_aligned_attributes(self, patch_info: PatchInfo) -> None:
+        """Check alignment attribute usage."""
+        align_tokens = ["__rte_aligned", "__rte_cache_aligned", "__rte_cache_min_aligned"]
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                for token in align_tokens:
+                    if re.search(rf"\b{token}\b", content):
+                        # Should only be used with struct or union
+                        if not re.search(rf"\b(struct|union)\s*{token}\b", content):
+                            self.add_result(
+                                "WARNING", "ALIGNED_ATTRIBUTE",
+                                f"Please use {token} only for struct or union types alignment",
+                                filename, line_num, content
+                            )
+
+    def check_packed_attributes(self, patch_info: PatchInfo) -> None:
+        """Check packed attribute usage."""
+        begin_count = 0
+        end_count = 0
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_packed_begin" in content:
+                    begin_count += 1
+                    # Should be after struct, union, or alignment attributes
+                    if not re.search(r"\b(struct|union)\s*__rte_packed_begin\b", content) and \
+                       not re.search(r"__rte_cache_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_cache_min_aligned\s*__rte_packed_begin", content) and \
+                       not re.search(r"__rte_aligned\(.*\)\s*__rte_packed_begin", content):
+                        self.add_result(
+                            "WARNING", "PACKED_ATTRIBUTE",
+                            "Use __rte_packed_begin only after struct, union or alignment attributes",
+                            filename, line_num, content
+                        )
+
+                if "__rte_packed_end" in content:
+                    end_count += 1
+
+        if begin_count != end_count:
+            self.add_result(
+                "WARNING", "PACKED_ATTRIBUTE",
+                "__rte_packed_begin and __rte_packed_end should always be used in pairs"
+            )
+
+    def check_patch(self, content: str, patch_file: str = None) -> bool:
+        """Run all checks on a patch."""
+        self.results = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Check patch format first
+        self.check_patch_format(content, patch_file)
+
+        patch_info = self.parse_patch(content)
+
+        # Run all checks
+        self.check_signoff(patch_info)
+        self.check_line_length(patch_info)
+        self.check_trailing_whitespace(patch_info)
+        self.check_tabs_spaces(patch_info)
+        self.check_coding_style(patch_info)
+        self.check_spelling(patch_info)
+        self.check_forbidden_tokens(patch_info)
+        self.check_experimental_tags(patch_info)
+        self.check_internal_tags(patch_info)
+        self.check_aligned_attributes(patch_info)
+        self.check_packed_attributes(patch_info)
+        self.check_commit_message(patch_info, content)
+
+        return self.errors == 0 and self.warnings == 0
+
+    def check_patch_format(self, content: str, patch_file: str = None) -> None:
+        """Check basic patch format for corruption."""
+        lines = content.split("\n")
+
+        # Track patch structure
+        has_diff = False
+        has_hunk = False
+        in_hunk = False
+        hunk_line = 0
+
+        for i, line in enumerate(lines, 1):
+            # Track diff headers
+            if line.startswith("diff --git"):
+                has_diff = True
+                in_hunk = False
+
+            # Parse hunk header
+            if line.startswith("@@"):
+                has_hunk = True
+                in_hunk = True
+                hunk_line = i
+                # Validate hunk header format
+                if not re.match(r"@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@", line):
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (malformed hunk header) at line {i}"
+                    )
+
+            # End of patch content (signature separator)
+            elif line == "-- ":
+                in_hunk = False
+
+            # Check for lines that look like they should be in a hunk but aren't prefixed
+            elif in_hunk and line and not line.startswith(("+", "-", " ", "\\", "diff ", "@@", "index ", "--- ", "+++ ", "new file", "deleted file", "old mode", "new mode", "rename ", "similarity", "copy ")):
+                # This could be a wrapped line or corruption
+                # But be careful - empty lines and commit message lines are OK
+                if not line.startswith(("From ", "Subject:", "Date:", "Signed-off-by:",
+                                       "Acked-by:", "Reviewed-by:", "Tested-by:",
+                                       "Fixes:", "Cc:", "---", "Message-Id:")):
+                    # Likely a corrupted/wrapped line in the diff
+                    self.add_result(
+                        "ERROR", "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (line wrapped?) at line {i}"
+                    )
+                    in_hunk = False  # Stop checking this hunk
+
+        if has_diff and not has_hunk:
+            self.add_result(
+                "ERROR", "CORRUPTED_PATCH",
+                "Patch appears to be corrupted (has diff but no hunks)"
+            )
+
+        # Check for DOS line endings
+        if "\r\n" in content:
+            self.add_result(
+                "ERROR", "DOS_LINE_ENDINGS",
+                "Patch has DOS line endings, should be UNIX line endings"
+            )
+
+    def check_commit_message(self, patch_info: PatchInfo, content: str) -> None:
+        """Check commit message for issues."""
+        lines = content.split("\n")
+
+        in_commit_msg = False
+        commit_msg_lines = []
+
+        for i, line in enumerate(lines):
+            if line.startswith("Subject:"):
+                in_commit_msg = True
+                continue
+            if line.startswith("---") or line.startswith("diff --git"):
+                in_commit_msg = False
+                continue
+            if in_commit_msg:
+                commit_msg_lines.append((i + 1, line))
+
+        for line_num, line in commit_msg_lines:
+            # UNKNOWN_COMMIT_ID: Fixes tag with short or invalid commit ID
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                commit_id = match.group(1)
+                if len(commit_id) < 12:
+                    self.add_result(
+                        "WARNING", "UNKNOWN_COMMIT_ID",
+                        f"Commit id '{commit_id}' is too short, use at least 12 characters",
+                        line_num=line_num, line_content=line
+                    )
+                # Check Fixes format: should be Fixes: <hash> ("commit subject")
+                if not re.match(r'^Fixes:\s+[0-9a-fA-F]{12,}\s+\("[^"]+"\)\s*$', line):
+                    self.add_result(
+                        "WARNING", "BAD_FIXES_TAG",
+                        "Fixes: tag format should be: Fixes: <12+ char hash> (\"commit subject\")",
+                        line_num=line_num, line_content=line
+                    )
+
+    def format_results(self, show_types: bool = True) -> str:
+        """Format the results for output."""
+        output = []
+
+        for result in self.results:
+            if result.filename and result.line_num:
+                prefix = f"{result.filename}:{result.line_num}:"
+            elif result.filename:
+                prefix = f"{result.filename}:"
+            else:
+                prefix = ""
+
+            type_str = f" [{result.type_name}]" if show_types else ""
+            output.append(f"{result.level}:{type_str} {result.message}")
+
+            if prefix:
+                output.append(f"#  {prefix}")
+            if result.line_content:
+                output.append(f"+  {result.line_content}")
+            output.append("")
+
+        return "\n".join(output)
+
+    def get_summary(self) -> str:
+        """Get a summary of the check results."""
+        return f"total: {self.errors} errors, {self.warnings} warnings, {self.checks} checks, {self.lines_checked} lines checked"
+
+
+def split_mbox(content: str) -> list[str]:
+    """Split an mbox file into individual messages.
+    
+    Mbox format uses 'From ' at the start of a line as message separator.
+    """
+    messages = []
+    current = []
+    
+    for line in content.split('\n'):
+        # Standard mbox separator: line starting with "From " followed by
+        # an address or identifier and a date
+        if line.startswith('From ') and current:
+            messages.append('\n'.join(current))
+            current = [line]
+        else:
+            current.append(line)
+    
+    if current:
+        messages.append('\n'.join(current))
+    
+    return messages
+
+
+def check_single_patch(checker: CheckPatch, patch_path: Optional[str],
+                       commit: Optional[str], verbose: bool, quiet: bool,
+                       pre_content: Optional[str] = None) -> bool:
+    """Check a single patch file or commit."""
+    subject = ""
+    content = ""
+
+    if pre_content:
+        content = pre_content
+    elif patch_path:
+        try:
+            with open(patch_path, "r", encoding="utf-8", errors="replace") as f:
+                content = f.read()
+        except IOError as e:
+            print(f"Error reading {patch_path}: {e}", file=sys.stderr)
+            return False
+    elif commit:
+        try:
+            result = subprocess.run(
+                ["git", "format-patch", "--find-renames", "--no-stat", "--stdout", "-1", commit],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode != 0:
+                print(f"Error getting commit {commit}", file=sys.stderr)
+                return False
+            content = result.stdout
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            print(f"Error running git: {e}", file=sys.stderr)
+            return False
+    else:
+        content = sys.stdin.read()
+
+    # Extract subject
+    match = re.search(r"^Subject:\s*(.+?)(?:\n(?=\S)|\n\n)", content, re.MULTILINE | re.DOTALL)
+    if match:
+        subject = match.group(1).replace("\n ", " ").strip()
+
+    if verbose:
+        print(f"\n### {subject}\n")
+
+    is_clean = checker.check_patch(content, patch_path)
+    has_issues = checker.errors > 0 or checker.warnings > 0
+
+    if has_issues or verbose:
+        if not verbose and subject:
+            print(f"\n### {subject}\n")
+        print(checker.format_results(show_types=True))
+        print(checker.get_summary())
+
+    return is_clean
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Check patches for DPDK coding style and common issues",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s patch.diff                Check a patch file
+  %(prog)s -n 3                      Check last 3 commits
+  %(prog)s -r origin/main..HEAD      Check commits in range
+  cat patch.diff | %(prog)s          Check patch from stdin
+"""
+    )
+
+    parser.add_argument("patches", nargs="*", help="Patch files to check")
+    parser.add_argument("-n", type=int, metavar="NUM",
+                       help="Check last NUM commits")
+    parser.add_argument("-r", "--range", metavar="RANGE",
+                       help="Check commits in git range (default: origin/main..)")
+    parser.add_argument("-q", "--quiet", action="store_true",
+                       help="Quiet mode - only show summary")
+    parser.add_argument("-v", "--verbose", action="store_true",
+                       help="Verbose mode - show all checks")
+    parser.add_argument("--max-line-length", type=int, default=DEFAULT_LINE_LENGTH,
+                       help=f"Maximum line length (default: {DEFAULT_LINE_LENGTH})")
+    parser.add_argument("--codespell", action="store_true", default=True,
+                       help="Enable spell checking (default: enabled)")
+    parser.add_argument("--no-codespell", dest="codespell", action="store_false",
+                       help="Disable spell checking")
+    parser.add_argument("--codespellfile", metavar="FILE",
+                       help="Path to codespell dictionary")
+    parser.add_argument("--show-types", action="store_true", default=True,
+                       help="Show message types (default: enabled)")
+    parser.add_argument("--no-show-types", dest="show_types", action="store_false",
+                       help="Hide message types")
+
+    return parser.parse_args()
+
+
+def main():
+    """Main entry point."""
+    args = parse_args()
+
+    # Build configuration
+    config = {
+        "max_line_length": args.max_line_length,
+        "codespell": args.codespell,
+        "show_types": args.show_types,
+    }
+
+    if args.codespellfile:
+        config["codespell_file"] = args.codespellfile
+
+    checker = CheckPatch(config)
+
+    total = 0
+    failed = 0
+
+    if args.patches:
+        # Check specified patch files
+        for patch in args.patches:
+            try:
+                with open(patch, "r", encoding="utf-8", errors="replace") as f:
+                    content = f.read()
+            except IOError as e:
+                print(f"Error reading {patch}: {e}", file=sys.stderr)
+                total += 1
+                failed += 1
+                continue
+
+            # Check if this is an mbox with multiple patches
+            messages = split_mbox(content)
+            if len(messages) > 1:
+                for msg in messages:
+                    # Only process messages that contain diffs
+                    if 'diff --git' in msg or '---' in msg:
+                        total += 1
+                        if not check_single_patch(checker, None, None, args.verbose, args.quiet, msg):
+                            failed += 1
+            else:
+                total += 1
+                if not check_single_patch(checker, patch, None, args.verbose, args.quiet):
+                    failed += 1
+
+    elif args.n or args.range:
+        # Check git commits
+        if args.n:
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", f"--max-count={args.n}", "HEAD"],
+                capture_output=True,
+                text=True
+            )
+        else:
+            git_range = args.range if args.range else "origin/main.."
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", git_range],
+                capture_output=True,
+                text=True
+            )
+
+        if result.returncode != 0:
+            print("Error getting git commits", file=sys.stderr)
+            sys.exit(1)
+
+        commits = result.stdout.strip().split("\n")
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    elif not sys.stdin.isatty():
+        # Read from stdin
+        total = 1
+        if not check_single_patch(checker, None, None, args.verbose, args.quiet):
+            failed += 1
+
+    else:
+        # Default to checking commits since origin/main
+        result = subprocess.run(
+            ["git", "rev-list", "--reverse", "origin/main.."],
+            capture_output=True,
+            text=True
+        )
+
+        commits = result.stdout.strip().split("\n") if result.stdout.strip() else []
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(checker, None, commit, args.verbose, args.quiet):
+                    failed += 1
+
+    # Print summary
+    passed = total - failed
+    if not args.quiet:
+        print(f"\n{passed}/{total} valid patch{'es' if passed != 1 else ''}")
+
+    sys.exit(0 if failed == 0 else 1)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v6] devtools: add Python-based patch style checker
  2026-01-31 20:48 [RFC] devtools: replace get-maintainer shell wrapper with Python script Stephen Hemminger
                   ` (4 preceding siblings ...)
  2026-02-26 17:15 ` [PATCH v5] devtools: add Python-based patch style checker Stephen Hemminger
@ 2026-03-24 14:48 ` Stephen Hemminger
  5 siblings, 0 replies; 13+ messages in thread
From: Stephen Hemminger @ 2026-03-24 14:48 UTC (permalink / raw)
  To: dev; +Cc: Stephen Hemminger, Bruce Richardson

Add dpdk-checkpatch.py as a standalone alternative to checkpatches.sh.
Unlike the existing shell script wrapper around checkpatch.pl, this
requires no Linux kernel source tree or Perl installation and is
significantly faster (~0.4s vs ~2m23s on a recent patch series).

Supports the same usage patterns: patch files, mbox bundles, git commit
ranges (-r), last N commits (-n), and stdin. Implements common style
checks (spacing, line length, spelling via codespell) plus
DPDK-specific forbidden token and tag checks.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
v6 - cleanup several false positives

 devtools/dpdk-checkpatch.py | 1829 +++++++++++++++++++++++++++++++++++
 1 file changed, 1829 insertions(+)
 create mode 100755 devtools/dpdk-checkpatch.py

diff --git a/devtools/dpdk-checkpatch.py b/devtools/dpdk-checkpatch.py
new file mode 100755
index 0000000000..64111c4eb7
--- /dev/null
+++ b/devtools/dpdk-checkpatch.py
@@ -0,0 +1,1829 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2026 Stephen Hemminger
+#
+# dpdk-checkpatch.py - Check patches for common style issues
+#
+# This is a standalone Python replacement for the DPDK checkpatches.sh
+# script that previously wrapped the Linux kernel's checkpatch.pl.
+#
+# Usage examples:
+#   # Check patch files
+#   dpdk-checkpatch.py *.patch
+#
+#   # Check patches before applying
+#   dpdk-checkpatch.py *.patch && git am *.patch
+#
+#   # Check commits since origin/main
+#   dpdk-checkpatch.py
+#
+#   # Quiet mode for scripting
+#   if dpdk-checkpatch.py -q "$patch"; then
+#       echo "Clean, applying..."
+#       git am "$patch"
+#   else
+#       echo "Issues found, skipping"
+#   fi
+#
+#   # Verbose output with context
+#   dpdk-checkpatch.py -v my-feature.patch
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+VERSION = "1.0"
+
+# Default configuration
+DEFAULT_LINE_LENGTH = 100
+DEFAULT_CODESPELL_DICT = "/usr/share/codespell/dictionary.txt"
+
+
+@dataclass
+class CheckResult:
+    """Result of a single check."""
+
+    level: str  # ERROR, WARNING, CHECK
+    type_name: str
+    message: str
+    filename: str = ""
+    line_num: int = 0
+    line_content: str = ""
+
+
+@dataclass
+class PatchInfo:
+    """Information extracted from a patch."""
+
+    subject: str = ""
+    author: str = ""
+    author_email: str = ""
+    signoffs: list = field(default_factory=list)
+    files: list = field(default_factory=list)
+    added_lines: dict = field(default_factory=dict)  # filename -> [(line_num, content)]
+    context_before: dict = field(
+        default_factory=dict
+    )  # filename -> {line_num: context_line}
+    has_fixes_tag: bool = False
+    fixes_commits: list = field(default_factory=list)
+
+
+class CheckPatch:
+    """Main class for checking patches."""
+
+    def __init__(self, config: dict):
+        self.config = config
+        self.results: list[CheckResult] = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Load codespell dictionary if enabled
+        self.spelling_dict = {}
+        if config.get("codespell"):
+            self._load_codespell_dict()
+
+        # Forbidden token rules for DPDK
+        self.forbidden_rules = self._init_forbidden_rules()
+
+    def _load_codespell_dict(self) -> None:
+        """Load the codespell dictionary."""
+        dict_path = self.config.get("codespell_file")
+
+        if not dict_path:
+            # Search common locations for the dictionary
+            search_paths = [
+                DEFAULT_CODESPELL_DICT,
+                "/usr/local/lib/python3.12/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.11/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/local/lib/python3.10/dist-packages/codespell_lib/data/dictionary.txt",
+                "/usr/lib/python3/dist-packages/codespell_lib/data/dictionary.txt",
+            ]
+
+            # Also try to find it via codespell module
+            try:
+                import codespell_lib
+
+                module_path = os.path.join(
+                    os.path.dirname(codespell_lib.__file__), "data", "dictionary.txt"
+                )
+                search_paths.insert(0, module_path)
+            except ImportError:
+                pass
+
+            for path in search_paths:
+                if os.path.exists(path):
+                    dict_path = path
+                    break
+
+        if not dict_path or not os.path.exists(dict_path):
+            return
+
+        try:
+            with open(dict_path, "r", encoding="utf-8", errors="ignore") as f:
+                for line in f:
+                    line = line.strip()
+                    if not line or line.startswith("#"):
+                        continue
+                    parts = line.split("->")
+                    if len(parts) >= 2:
+                        wrong = parts[0].strip().lower()
+                        correct = parts[1].strip().split(",")[0].strip()
+                        self.spelling_dict[wrong] = correct
+        except IOError:
+            pass
+
+    def _init_forbidden_rules(self) -> list:
+        """Initialize DPDK-specific forbidden token rules."""
+        return [
+            # Refrain from new calls to RTE_LOG in libraries
+            {
+                "folders": ["lib"],
+                "patterns": [r"RTE_LOG\("],
+                "message": "Prefer RTE_LOG_LINE",
+            },
+            # Refrain from new calls to RTE_LOG in drivers
+            {
+                "folders": ["drivers"],
+                "skip_files": [r".*osdep\.h$"],
+                "patterns": [r"RTE_LOG\(", r"RTE_LOG_DP\(", r"rte_log\("],
+                "message": "Prefer RTE_LOG_LINE/RTE_LOG_DP_LINE",
+            },
+            # No output on stdout or stderr
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bprintf\b", r"fprintf\(stdout,", r"fprintf\(stderr,"],
+                "message": "Writing to stdout or stderr",
+            },
+            # Refrain from rte_panic() and rte_exit()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_panic\(", r"rte_exit\("],
+                "message": "Using rte_panic/rte_exit",
+            },
+            # Don't call directly install_headers()
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\binstall_headers\b"],
+                "message": "Using install_headers()",
+            },
+            # Refrain from using compiler attribute without common macro
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"__attribute__"],
+                "message": "Using compiler attribute directly",
+            },
+            # Check %l or %ll format specifier
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"%ll*[xud]"],
+                "message": "Using %l format, prefer %PRI*64 if type is [u]int64_t",
+            },
+            # Refrain from 16/32/64 bits rte_atomicNN_xxx()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_atomic[0-9][0-9]_.*\("],
+                "message": "Using rte_atomicNN_xxx",
+            },
+            # Refrain from rte_smp_[r/w]mb()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"rte_smp_(r|w)?mb\("],
+                "message": "Using rte_smp_[r/w]mb",
+            },
+            # Refrain from __sync_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__sync_.*\("],
+                "message": "Using __sync_xxx builtins",
+            },
+            # Refrain from __rte_atomic_thread_fence()
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"__rte_atomic_thread_fence\("],
+                "message": "Using __rte_atomic_thread_fence, prefer rte_atomic_thread_fence",
+            },
+            # Refrain from __atomic_xxx builtins
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"drivers/common/cnxk/"],
+                "patterns": [
+                    r"__atomic_.*\(",
+                    r"__ATOMIC_(RELAXED|CONSUME|ACQUIRE|RELEASE|ACQ_REL|SEQ_CST)",
+                ],
+                "message": "Using __atomic_xxx/__ATOMIC_XXX built-ins, prefer rte_atomic_xxx/rte_memory_order_xxx",
+            },
+            # Refrain from some pthread functions
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [
+                    r"pthread_(create|join|detach|set(_?name_np|affinity_np)|attr_set(inheritsched|schedpolicy))\("
+                ],
+                "message": "Using pthread functions, prefer rte_thread",
+            },
+            # Forbid use of __reserved
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__reserved\b"],
+                "message": "Using __reserved",
+            },
+            # Forbid use of __alignof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__alignof__\b"],
+                "message": "Using __alignof__, prefer C11 alignof",
+            },
+            # Forbid use of __typeof__
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"\b__typeof__\b"],
+                "message": "Using __typeof__, prefer typeof",
+            },
+            # Forbid use of __builtin_*
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [
+                    r"lib/eal/",
+                    r"drivers/.*/base/",
+                    r"drivers/.*osdep\.h$",
+                ],
+                "patterns": [r"\b__builtin_"],
+                "message": "Using __builtin helpers, prefer EAL macros",
+            },
+            # Forbid inclusion of linux/pci_regs.h
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"include.*linux/pci_regs\.h"],
+                "message": "Using linux/pci_regs.h, prefer rte_pci.h",
+            },
+            # Forbid variadic argument pack extension in macros
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"#\s*define.*[^(,\s]\.\.\.[\s]*\)"],
+                "message": "Do not use variadic argument pack in macros",
+            },
+            # Forbid __rte_packed_begin with enums
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "patterns": [r"enum.*__rte_packed_begin"],
+                "message": "Using __rte_packed_begin with enum is not allowed",
+            },
+            # Forbid use of #pragma
+            {
+                "folders": ["lib", "drivers", "app", "examples"],
+                "skip_files": [r"lib/eal/include/rte_common\.h"],
+                "patterns": [r"(#pragma|_Pragma)"],
+                "message": "Using compilers pragma is not allowed",
+            },
+            # Forbid experimental build flag except in examples
+            {
+                "folders": ["lib", "drivers", "app"],
+                "patterns": [r"-DALLOW_EXPERIMENTAL_API", r"allow_experimental_apis"],
+                "message": "Using experimental build flag for in-tree compilation",
+            },
+            # Refrain from using RTE_LOG_REGISTER for drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"\bRTE_LOG_REGISTER\b"],
+                "message": "Using RTE_LOG_REGISTER, prefer RTE_LOG_REGISTER_(DEFAULT|SUFFIX)",
+            },
+            # Forbid non-internal thread in drivers and libs
+            {
+                "folders": ["lib", "drivers"],
+                "patterns": [r"rte_thread_(set_name|create_control)\("],
+                "message": "Prefer rte_thread_(set_prefixed_name|create_internal_control)",
+            },
+        ]
+
+    def add_result(
+        self,
+        level: str,
+        type_name: str,
+        message: str,
+        filename: str = "",
+        line_num: int = 0,
+        line_content: str = "",
+    ) -> None:
+        """Add a check result."""
+        result = CheckResult(
+            level=level,
+            type_name=type_name,
+            message=message,
+            filename=filename,
+            line_num=line_num,
+            line_content=line_content,
+        )
+        self.results.append(result)
+
+        if level == "ERROR":
+            self.errors += 1
+        elif level == "WARNING":
+            self.warnings += 1
+        else:
+            self.checks += 1
+
+    def parse_patch(self, content: str) -> PatchInfo:
+        """Parse a patch and extract information."""
+        info = PatchInfo()
+        current_file = ""
+        in_diff = False
+        line_num_in_new = 0
+
+        lines = content.split("\n")
+        for i, line in enumerate(lines):
+            # Extract subject
+            if line.startswith("Subject:"):
+                subject = line[8:].strip()
+                # Handle multi-line subjects
+                j = i + 1
+                while j < len(lines) and lines[j].startswith(" "):
+                    subject += " " + lines[j].strip()
+                    j += 1
+                info.subject = subject
+
+            # Extract author
+            if line.startswith("From:"):
+                info.author = line[5:].strip()
+                match = re.search(r"<([^>]+)>", info.author)
+                if match:
+                    info.author_email = match.group(1)
+
+            # Extract Signed-off-by
+            match = re.match(r"^Signed-off-by:\s*(.+)$", line, re.IGNORECASE)
+            if match:
+                info.signoffs.append(match.group(1).strip())
+
+            # Extract Fixes tag
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                info.has_fixes_tag = True
+                info.fixes_commits.append(match.group(1))
+
+            # Track files in diff
+            if line.startswith("diff --git"):
+                match = re.match(r"diff --git a/(\S+) b/(\S+)", line)
+                if match:
+                    current_file = match.group(2)
+                    if current_file not in info.files:
+                        info.files.append(current_file)
+                    info.added_lines[current_file] = []
+                in_diff = True
+
+            # Track hunks
+            if line.startswith("@@"):
+                match = re.match(r"@@ -\d+(?:,\d+)? \+(\d+)", line)
+                if match:
+                    line_num_in_new = int(match.group(1))
+                continue
+
+            # Track added lines
+            if in_diff and current_file:
+                if line.startswith("+") and not line.startswith("+++"):
+                    info.added_lines[current_file].append((line_num_in_new, line[1:]))
+                    line_num_in_new += 1
+                elif line.startswith("-"):
+                    pass  # Deleted line, don't increment
+                elif not line.startswith("\\"):
+                    # Context line - store it for reference by line number
+                    if current_file not in info.context_before:
+                        info.context_before[current_file] = {}
+                    info.context_before[current_file][line_num_in_new] = (
+                        line[1:] if line.startswith(" ") else line
+                    )
+                    line_num_in_new += 1
+
+        return info
+
+    def check_line_length(self, patch_info: PatchInfo) -> None:
+        """Check for lines exceeding maximum length."""
+        max_len = self.config.get("max_line_length", DEFAULT_LINE_LENGTH)
+
+        for filename, lines in patch_info.added_lines.items():
+            # Skip documentation files - they have tables and other content
+            # where long lines are acceptable
+            if filename.endswith((".rst", ".md", ".txt")) or "/doc/" in filename:
+                continue
+
+            for line_num, content in lines:
+                # Skip strings that span multiple lines
+                if len(content) > max_len:
+                    # Don't warn about long strings or URLs
+                    if '"' in content and content.count('"') >= 2:
+                        continue
+                    if "http://" in content or "https://" in content:
+                        continue
+                    # Check if it's a comment line
+                    if (
+                        content.strip().startswith("/*")
+                        or content.strip().startswith("*")
+                        or content.strip().startswith("//")
+                    ):
+                        self.add_result(
+                            "WARNING",
+                            "LONG_LINE_COMMENT",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename,
+                            line_num,
+                            content,
+                        )
+                    else:
+                        self.add_result(
+                            "WARNING",
+                            "LONG_LINE",
+                            f"line length of {len(content)} exceeds {max_len} columns",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+    def check_trailing_whitespace(self, patch_info: PatchInfo) -> None:
+        """Check for trailing whitespace."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if content != content.rstrip():
+                    self.add_result(
+                        "WARNING",
+                        "TRAILING_WHITESPACE",
+                        "trailing whitespace",
+                        filename,
+                        line_num,
+                        content,
+                    )
+
+    def check_tabs_spaces(self, patch_info: PatchInfo) -> None:
+        """Check for space before tab and mixed indentation."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if " \t" in content:
+                    self.add_result(
+                        "WARNING",
+                        "SPACE_BEFORE_TAB",
+                        "space before tab in indent",
+                        filename,
+                        line_num,
+                        content,
+                    )
+
+    def check_signoff(self, patch_info: PatchInfo) -> None:
+        """Check for Signed-off-by line."""
+        if not patch_info.signoffs:
+            self.add_result(
+                "ERROR", "MISSING_SIGN_OFF", "Missing Signed-off-by: line(s)"
+            )
+
+    def check_coding_style(self, patch_info: PatchInfo) -> None:
+        """Check various coding style issues."""
+        for filename, lines in patch_info.added_lines.items():
+            # Skip non-C files for most checks
+            is_c_file = filename.endswith((".c", ".h"))
+            is_c_source = filename.endswith(".c")
+            is_header = filename.endswith(".h")
+
+            prev_line = ""
+            indent_stack = []
+            context_before = patch_info.context_before.get(filename, {})
+            for line_num, content in lines:
+                self.lines_checked += 1
+
+                # Check if the line immediately before this one (which may be
+                # a context line from the patch) ended with backslash continuation
+                prev_context = context_before.get(line_num - 1, "")
+                in_macro_continuation = prev_context.rstrip().endswith("\\")
+
+                if is_c_file:
+                    # Check for extern function declarations in .c files
+                    # Only flag functions (have parentheses), not data
+                    if is_c_source and re.match(r"^\s*extern\b", content):
+                        if re.search(r"\(", content):
+                            self.add_result(
+                                "WARNING",
+                                "AVOID_EXTERNS",
+                                "extern is not needed for function declarations",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # Check for unnecessary break after goto/return/continue
+                    # Only flag if the previous statement is unconditional (not inside an if)
+                    if re.match(r"^\s*break\s*;", content):
+                        # Check if previous line is an unconditional return/goto/continue
+                        # It's unconditional if it starts at the same or lower indentation as break
+                        # or if it's a plain return/goto not inside an if block
+                        prev_stripped = prev_line.strip() if prev_line else ""
+                        if re.match(
+                            r"^(goto\s+\w+|return\b|continue)\s*[^;]*;\s*$",
+                            prev_stripped,
+                        ):
+                            # Check indentation - if prev line has same or less indentation, it's unconditional
+                            break_indent = len(content) - len(content.lstrip())
+                            prev_indent = (
+                                len(prev_line) - len(prev_line.lstrip())
+                                if prev_line
+                                else 0
+                            )
+                            # Only flag if the return/goto is at the same indentation level
+                            # (meaning it's not inside a nested if block)
+                            if prev_indent <= break_indent:
+                                self.add_result(
+                                    "WARNING",
+                                    "UNNECESSARY_BREAK",
+                                    "break is not useful after a goto or return",
+                                    filename,
+                                    line_num,
+                                    content,
+                                )
+
+                    # STRNCPY: should use strlcpy
+                    if re.search(r"\bstrncpy\s*\(", content):
+                        self.add_result(
+                            "WARNING",
+                            "STRNCPY",
+                            "Prefer strlcpy over strncpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # STRCPY: unsafe string copy
+                    if re.search(r"\bstrcpy\s*\(", content):
+                        self.add_result(
+                            "ERROR",
+                            "STRCPY",
+                            "strcpy is unsafe - use strlcpy or snprintf",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # Check for complex macros without proper enclosure
+                    # Note: Compound literal macros like (type[]){...} are valid C99
+                    # and commonly used in DPDK, so we don't flag those.
+                    # Only flag macros with multiple statements without do-while wrapping.
+                    if re.match(r"^\s*#\s*define\s+\w+\s*\([^)]*\)\s+\{", content):
+                        # Macro body starts with { but is not a compound literal
+                        # Check if it's missing do { } while(0)
+                        if not re.search(r"\bdo\s*\{", content):
+                            self.add_result(
+                                "ERROR",
+                                "COMPLEX_MACRO",
+                                "Macros with complex values should be enclosed in parentheses or do { } while(0)",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # SPACING: missing space before ( in control statements
+                    if re.search(r"\b(if|while|for|switch)\(", content):
+                        self.add_result(
+                            "WARNING",
+                            "SPACING",
+                            "space required before the open parenthesis '('",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # SPACING: space prohibited after open square bracket
+                    # Remove string contents first to avoid false positives
+                    code_only = re.sub(r'"[^"]*"', '""', content)
+                    if re.search(r"\[\s+[^\]]", code_only) and not re.search(
+                        r"\[\s*\]", code_only
+                    ):
+                        self.add_result(
+                            "WARNING",
+                            "SPACING",
+                            "space prohibited after that open square bracket '['",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # SPACING: space prohibited before close square bracket
+                    if re.search(r"[^\[]\s+\]", code_only):
+                        self.add_result(
+                            "WARNING",
+                            "SPACING",
+                            "space prohibited before that close square bracket ']'",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # RETURN_PARENTHESES: return with parentheses wrapping the
+                    # entire expression, e.g. "return (x + y);"
+                    # Only flag when the outermost parens span the whole value,
+                    # so "return (a) | (b);" is NOT flagged (parens are for
+                    # sub-expression grouping, not wrapping the return value).
+                    if re.search(r"\breturn\s*\([^;]+\)\s*;", content):
+                        m = re.search(r"\breturn\s+(.*?)\s*;", content)
+                        if m:
+                            expr = m.group(1).strip()
+                            if expr.startswith("(") and expr.endswith(")"):
+                                # Walk to find the matching close paren for the first '('
+                                depth = 0
+                                match_pos = -1
+                                for ci, ch in enumerate(expr):
+                                    if ch == "(":
+                                        depth += 1
+                                    elif ch == ")":
+                                        depth -= 1
+                                        if depth == 0:
+                                            match_pos = ci
+                                            break
+                                # Only flag if the matching ')' is the last character,
+                                # meaning the outer parens wrap the entire expression
+                                if match_pos == len(expr) - 1:
+                                    inner = expr[1:-1].strip()
+                                    # Exclude casts: (type)expr and function calls
+                                    is_cast = re.match(
+                                        r"^[a-zA-Z_][\w\s\*]*\b\s*[\w(]", inner
+                                    )
+                                    is_func = re.search(r"\w\s*\(", inner)
+                                    if not is_cast and not is_func:
+                                        self.add_result(
+                                            "WARNING",
+                                            "RETURN_PARENTHESES",
+                                            "return is not a function, parentheses are not required",
+                                            filename,
+                                            line_num,
+                                            content,
+                                        )
+
+                    # BRACES: single statement blocks that need braces
+                    # Check for if/else/while/for without braces on multiline
+                    if re.match(r"^\s*(if|else\s+if|while|for)\s*\([^{]*$", content):
+                        # Control statement without opening brace - check next line
+                        pass  # Would need lookahead
+
+                    # INITIALISED_STATIC: static initialized to 0/NULL
+                    if re.match(
+                        r"^\s*static\s+.*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;", content
+                    ):
+                        self.add_result(
+                            "WARNING",
+                            "INITIALISED_STATIC",
+                            "do not initialise statics to 0 or NULL",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # GLOBAL_INITIALISERS: global initialized to 0/NULL
+                    if re.match(
+                        r"^[a-zA-Z_][a-zA-Z0-9_\s\*]*=\s*(0|NULL|0L|0UL|0ULL|0LL)\s*;",
+                        content,
+                    ):
+                        if not re.match(r"^\s*static\s+", content):
+                            self.add_result(
+                                "WARNING",
+                                "GLOBAL_INITIALISERS",
+                                "do not initialise globals to 0 or NULL",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # Note: DEEP_INDENTATION check removed - without full brace
+                    # nesting tracking (as in checkpatch.pl), tab counting produces
+                    # too many false positives in legitimate code like switch/case
+                    # blocks and nested loops in driver transmit paths.
+
+                    # TRAILING_STATEMENTS: code on same line as } OR control statement
+                    # But allow struct/union member declarations: } name; or } name; /* comment */
+                    # Skip macro definitions - they often have intentional one-line constructs
+                    is_macro_line = content.rstrip().endswith("\\") or re.match(
+                        r"^\s*#\s*define", content
+                    )
+                    if not is_macro_line:
+                        if re.search(r"\}\s*[a-zA-Z_]", content) and not re.search(
+                            r"\}\s*(else|while)\b", content
+                        ):
+                            # Check if this is a struct/union member declaration or
+                            # named initializer list (e.g. } errata_vals[] = {)
+                            # Pattern: } identifier; or } identifier[]; or } identifier[] = {
+                            if not re.search(
+                                r"\}\s*\w+\s*(\[\d*\])?\s*;\s*(/\*.*\*/|//.*)?\s*$",
+                                content,
+                            ) and not re.search(
+                                r"\}\s*\w+\s*(\[\d*\])?\s*=\s*\{", content
+                            ):
+                                self.add_result(
+                                    "ERROR",
+                                    "TRAILING_STATEMENTS",
+                                    "trailing statements should be on next line",
+                                    filename,
+                                    line_num,
+                                    content,
+                                )
+                        # Also check for if/while with statement on same line (not opening brace)
+                        # Pattern: if (cond) statement; or while (cond) statement;
+                        # Note: 'for' is excluded because its header contains semicolons
+                        # and nested parentheses (e.g. sizeof()) break simple regex matching
+                        if re.search(
+                            r"\b(if|while)\s*\([^)]+\)\s+(?![\s{])[^;]*;", content
+                        ):
+                            self.add_result(
+                                "ERROR",
+                                "TRAILING_STATEMENTS",
+                                "trailing statements should be on next line",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # CONSTANT_COMPARISON: Yoda conditions (constant on left)
+                    # Check for constants on left side of any comparison operator
+                    # Strip comments and strings to avoid false positives
+                    cmp_content = content.strip()
+                    cmp_content = re.sub(r"/\*.*?\*/", "", cmp_content)  # inline /* */
+                    cmp_content = re.sub(r"//.*$", "", cmp_content)  # trailing //
+                    cmp_content = re.sub(r'"[^"]*"', '""', cmp_content)  # strings
+                    if cmp_content and not cmp_content.startswith("*"):
+                        if re.search(
+                            r"\b(NULL|true|false)\s*[!=<>]=?\s*[&*\w]", cmp_content
+                        ) or re.search(r"[\s(]\s*0\s*[!=<>]=?\s*[&*\w]", cmp_content):
+                            # Exclude static_assert - operand order doesn't matter
+                            if not re.match(r"^\s*static_assert\s*\(", cmp_content):
+                                self.add_result(
+                                    "WARNING",
+                                    "CONSTANT_COMPARISON",
+                                    "Comparisons should place the constant on the right side",
+                                    filename,
+                                    line_num,
+                                    content,
+                                )
+
+                    # BRACES: single statement block should not have braces (or vice versa)
+                    # Check for if/else/while/for with single statement in braces
+                    if re.match(r"^\s*(if|while|for)\s*\([^)]+\)\s*\{\s*$", prev_line):
+                        if re.match(r"^\s*\w.*;\s*$", content) and not re.search(
+                            r"^\s*(if|else|while|for|switch|case|default|return\s*;)",
+                            content,
+                        ):
+                            # Check if next line is just closing brace - would need lookahead
+                            pass
+
+                    # ONE_SEMICOLON: double semicolon
+                    if re.search(r";;", content) and not re.search(
+                        r"for\s*\([^)]*;;", content
+                    ):
+                        self.add_result(
+                            "WARNING",
+                            "ONE_SEMICOLON",
+                            "Statements terminations use 1 semicolon",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # CODE_INDENT/LEADING_SPACE: spaces used for indentation instead of tabs
+                    if re.match(
+                        r"^    +[^\s]", content
+                    ) and not content.strip().startswith("*"):
+                        # Line starts with spaces (not tabs) - but allow for alignment in comments
+                        self.add_result(
+                            "WARNING",
+                            "CODE_INDENT",
+                            "code indent should use tabs where possible",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # LEADING_SPACE: spaces at start of line (more general)
+                    if re.match(r"^ +\t", content):
+                        self.add_result(
+                            "WARNING",
+                            "LEADING_SPACE",
+                            "please, no spaces at the start of a line",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # LINE_CONTINUATIONS: backslash continuation outside macros
+                    # Check if this line has a backslash continuation
+                    if content.rstrip().endswith("\\"):
+                        # Only flag if not inside a macro definition
+                        # A macro context means either:
+                        # - This line starts a #define
+                        # - The previous line (added or context) was a continuation
+                        # - This line is a preprocessor directive
+                        is_in_macro = (
+                            re.match(r"^\s*#", content)
+                            or (prev_line and prev_line.rstrip().endswith("\\"))
+                            or in_macro_continuation
+                        )
+                        if not is_in_macro:
+                            self.add_result(
+                                "WARNING",
+                                "LINE_CONTINUATIONS",
+                                "Avoid unnecessary line continuations",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # OPEN_ENDED_LINE: lines should not end with '('
+                    # This suggests arguments should start on the same line
+                    # as the function name rather than wrapping immediately
+                    # Skip macro continuation lines and preprocessor directives
+                    stripped_end = content.rstrip()
+                    if not is_macro_line and stripped_end.endswith("("):
+                        # Don't flag control flow statements - their parens
+                        # contain conditions, not argument lists
+                        if not re.search(
+                            r"\b(if|while|for|switch)\s*\($", stripped_end
+                        ):
+                            self.add_result(
+                                "CHECK",
+                                "OPEN_ENDED_LINE",
+                                "Lines should not end with a '('",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # FUNCTION_WITHOUT_ARGS: empty parens instead of (void)
+                    if is_header and re.search(r"\b\w+\s*\(\s*\)\s*;", content):
+                        if not re.search(
+                            r"\b(while|if|for|switch|return)\s*\(\s*\)", content
+                        ):
+                            self.add_result(
+                                "ERROR",
+                                "FUNCTION_WITHOUT_ARGS",
+                                "Bad function definition - use (void) instead of ()",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # INLINE_LOCATION: inline should come after storage class
+                    if re.match(r"^\s*inline\s+(static|extern)", content):
+                        self.add_result(
+                            "ERROR",
+                            "INLINE_LOCATION",
+                            "inline keyword should sit between storage class and type",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # STATIC_CONST: const should come after static
+                    if re.match(r"^\s*const\s+static\b", content):
+                        self.add_result(
+                            "WARNING",
+                            "STATIC_CONST",
+                            "Move const after static - use 'static const'",
+                            filename,
+                            line_num,
+                            content,
+                        )
+                        self.add_result(
+                            "WARNING",
+                            "STORAGE_CLASS",
+                            "storage class should be at the beginning of the declaration",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # CONST_CONST: const used twice
+                    if re.search(r"\bconst\s+\w+\s+const\b", content):
+                        self.add_result(
+                            "WARNING",
+                            "CONST_CONST",
+                            "const used twice - remove duplicate const",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # SELF_ASSIGNMENT: x = x (simple variable, not struct members)
+                    # Match only simple identifiers, not struct/pointer member access
+                    match = re.search(r"^\s*(\w+)\s*=\s*(\w+)\s*;", content)
+                    if match and match.group(1) == match.group(2):
+                        self.add_result(
+                            "WARNING",
+                            "SELF_ASSIGNMENT",
+                            "Do not use self-assignments to avoid compiler warnings",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # PREFER_DEFINED_ATTRIBUTE_MACRO: prefer DPDK/kernel macros over __attribute__
+                    attr_macros = {
+                        "cold": "__rte_cold",
+                        "hot": "__rte_hot",
+                        "noinline": "__rte_noinline",
+                        "always_inline": "__rte_always_inline",
+                        "unused": "__rte_unused",
+                        "packed": "__rte_packed",
+                        "aligned": "__rte_aligned",
+                        "weak": "__rte_weak",
+                        "pure": "__rte_pure",
+                    }
+                    for attr, replacement in attr_macros.items():
+                        if re.search(rf"__attribute__\s*\(\s*\(\s*{attr}\b", content):
+                            self.add_result(
+                                "WARNING",
+                                "PREFER_DEFINED_ATTRIBUTE_MACRO",
+                                f"Prefer {replacement} over __attribute__(({attr}))",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # POINTER_LOCATION: char* instead of char *
+                    if re.search(
+                        r"\b(char|int|void|short|long|float|double|unsigned|signed)\*\s+\w",
+                        content,
+                    ):
+                        self.add_result(
+                            "ERROR",
+                            "POINTER_LOCATION",
+                            '"foo* bar" should be "foo *bar"',
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # MACRO_WITH_FLOW_CONTROL: macros with return/goto/break
+                    if re.match(
+                        r"^\s*#\s*define\s+\w+.*\b(return|goto|break|continue)\b",
+                        content,
+                    ):
+                        self.add_result(
+                            "WARNING",
+                            "MACRO_WITH_FLOW_CONTROL",
+                            "Macros with flow control statements should be avoided",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros with multiple statements
+                    if re.match(
+                        r"^\s*#\s*define\s+\w+\([^)]*\)\s+.*;\s*[^\\]", content
+                    ):
+                        if not re.search(r"do\s*\{", content):
+                            self.add_result(
+                                "WARNING",
+                                "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                                "Macros with multiple statements should use do {} while(0)",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # MULTISTATEMENT_MACRO_USE_DO_WHILE: macros starting with if
+                    if re.match(r"^\s*#\s*define\s+\w+\([^)]*\)\s+if\s*\(", content):
+                        self.add_result(
+                            "ERROR",
+                            "MULTISTATEMENT_MACRO_USE_DO_WHILE",
+                            "Macros starting with if should be enclosed by a do - while loop",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # Multiple statements on one line (skip comments and strings)
+                    stripped_content = content.strip()
+                    if re.search(r";\s*[a-zA-Z_]", content) and "for" not in content:
+                        # Skip if line is a comment
+                        if not (
+                            stripped_content.startswith("/*")
+                            or stripped_content.startswith("*")
+                            or stripped_content.startswith("//")
+                        ):
+                            # Skip if the semicolon is inside a string or comment
+                            # Remove strings and comments before checking
+                            code_only = re.sub(
+                                r'"[^"]*"', '""', content
+                            )  # Remove string contents
+                            code_only = re.sub(
+                                r"/\*.*?\*/", "", code_only
+                            )  # Remove /* */ comments
+                            code_only = re.sub(
+                                r"//.*$", "", code_only
+                            )  # Remove // comments
+                            if re.search(r";\s*[a-zA-Z_]", code_only):
+                                self.add_result(
+                                    "CHECK",
+                                    "MULTIPLE_STATEMENTS",
+                                    "multiple statements on one line",
+                                    filename,
+                                    line_num,
+                                    content,
+                                )
+
+                    # Check for C99 comments in headers that should use C89
+                    if is_header and "//" in content:
+                        # Only flag if not in a string
+                        stripped = re.sub(r'"[^"]*"', "", content)
+                        if "//" in stripped:
+                            self.add_result(
+                                "CHECK",
+                                "C99_COMMENTS",
+                                "C99 // comments are acceptable but /* */ is preferred in headers",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                    # BLOCK_COMMENT_STYLE: block comments style issues
+                    # Leading /* on its own line (but allow Doxygen /** style)
+                    if re.match(r"^\s*/\*\*+\s*$", content):
+                        # Allow /** (Doxygen) but not /*** or more
+                        if not re.match(r"^\s*/\*\*\s*$", content):
+                            self.add_result(
+                                "WARNING",
+                                "BLOCK_COMMENT_STYLE",
+                                "Block comments should not use a leading /* on a line by itself",
+                                filename,
+                                line_num,
+                                content,
+                            )
+                    # Trailing */ on separate line after block comment
+                    if re.match(
+                        r"^\s*\*+/\s*$", content
+                    ) and prev_line.strip().startswith("*"):
+                        pass  # This is actually acceptable
+                    # Block with trailing */ but content before it (like === */)
+                    if re.search(r"\S\s*=+\s*\*/\s*$", content):
+                        self.add_result(
+                            "WARNING",
+                            "BLOCK_COMMENT_STYLE",
+                            "Block comments use a trailing */ on a separate line",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # REPEATED_WORD: check for repeated words (case-sensitive to
+                    # avoid false positives like "--format FORMAT" in help text).
+                    # Also skip "struct foo foo;" / "union foo foo;" where the
+                    # type name and variable name are legitimately identical.
+                    words = re.findall(r"\b(\w+)\s+\1\b", content)
+                    for word in words:
+                        word_lower = word.lower()
+                        # Skip common valid repeated patterns
+                        if word_lower in ("that", "had", "long", "int", "short"):
+                            continue
+                        # Skip struct/union type-name used as variable name:
+                        #   struct foo foo;  or  } foo foo;
+                        if re.search(
+                            r"\b(struct|union)\s+" + re.escape(word) + r"\s+" + re.escape(word) + r"\b",
+                            content,
+                        ):
+                            continue
+                        self.add_result(
+                            "WARNING",
+                            "REPEATED_WORD",
+                            f"Possible repeated word: '{word}'",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                    # STRING_FRAGMENTS: unnecessary string concatenation like "foo" "bar"
+                    # Must have closing quote, whitespace, opening quote pattern
+                    if re.search(r'"\s*"\s*[^)]', content) and not re.search(
+                        r"#\s*define", content
+                    ):
+                        # Verify it's actually two separate strings being concatenated
+                        # by checking for the pattern: "..." "..."
+                        if re.search(r'"[^"]*"\s+"[^"]*"', content):
+                            self.add_result(
+                                "CHECK",
+                                "STRING_FRAGMENTS",
+                                "Consecutive strings are generally better as a single string",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+                prev_line = content
+
+    def check_spelling(self, patch_info: PatchInfo) -> None:
+        """Check for spelling errors using codespell dictionary."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                # REPEATED_WORD check for non-C files (C files handled in check_coding_style)
+                if not filename.endswith((".c", ".h")):
+                    words = re.findall(r"\b(\w+)\s+\1\b", content)
+                    for word in words:
+                        word_lower = word.lower()
+                        if word_lower in ("that", "had", "long", "int", "short"):
+                            continue
+                        if re.search(
+                            r"\b(struct|union)\s+" + re.escape(word) + r"\s+" + re.escape(word) + r"\b",
+                            content,
+                        ):
+                            continue
+                        self.add_result(
+                            "WARNING",
+                            "REPEATED_WORD",
+                            f"Possible repeated word: '{word}'",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                # Spelling check
+                if self.spelling_dict:
+                    # Common abbreviations that should not be flagged as typos
+                    abbreviations = {
+                        "nd",
+                        "ns",
+                        "na",
+                        "ra",
+                        "rs",  # IPv6 Neighbor Discovery
+                        "tx",
+                        "rx",
+                        "id",
+                        "io",
+                        "ip",  # Common networking
+                        "tcp",
+                        "udp",
+                        "arp",
+                        "dns",  # Protocols
+                        "hw",
+                        "sw",
+                        "fw",  # Hardware/Software/Firmware
+                        "src",
+                        "dst",
+                        "ptr",
+                        "buf",  # Common code abbreviations
+                        "cfg",
+                        "ctx",
+                        "idx",
+                        "cnt",  # Config/Context/Index/Count
+                        "len",
+                        "num",
+                        "max",
+                        "min",  # Length/Number/Max/Min
+                        "prev",
+                        "next",
+                        "curr",  # Previous/Next/Current
+                        "init",
+                        "fini",
+                        "deinit",  # Initialize/Finish
+                        "alloc",
+                        "dealloc",
+                        "realloc",  # Memory
+                        "endcode",  # Doxygen tag
+                    }
+                    # Extract words, but skip contractions (don't, couldn't, etc.)
+                    # by removing them before word extraction
+                    spell_content = re.sub(r"[a-zA-Z]+n't\b", "", content)
+                    spell_content = re.sub(r"[a-zA-Z]+'[a-zA-Z]+", "", spell_content)
+                    words = re.findall(r"\b[a-zA-Z]+\b", spell_content)
+                    for word in words:
+                        lower_word = word.lower()
+                        if (
+                            lower_word in self.spelling_dict
+                            and lower_word not in abbreviations
+                        ):
+                            self.add_result(
+                                "WARNING",
+                                "TYPO_SPELLING",
+                                f"'{word}' may be misspelled - perhaps '{self.spelling_dict[lower_word]}'?",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+    def check_forbidden_tokens(self, patch_info: PatchInfo) -> None:
+        """Check for DPDK-specific forbidden tokens."""
+        for filename, lines in patch_info.added_lines.items():
+            for rule in self.forbidden_rules:
+                # Check if file is in one of the target folders
+                in_folder = False
+                for folder in rule["folders"]:
+                    if filename.startswith(folder + "/") or filename.startswith(
+                        "b/" + folder + "/"
+                    ):
+                        in_folder = True
+                        break
+
+                if not in_folder:
+                    continue
+
+                # Check if file should be skipped
+                skip = False
+                for skip_pattern in rule.get("skip_files", []):
+                    if re.search(skip_pattern, filename):
+                        skip = True
+                        break
+
+                if skip:
+                    continue
+
+                # Check each line for forbidden patterns
+                for line_num, content in lines:
+                    for pattern in rule["patterns"]:
+                        if re.search(pattern, content):
+                            self.add_result(
+                                "WARNING",
+                                "FORBIDDEN_TOKEN",
+                                rule["message"],
+                                filename,
+                                line_num,
+                                content,
+                            )
+                            break
+
+    def check_experimental_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_experimental tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_experimental" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING",
+                            "EXPERIMENTAL_TAG",
+                            f"Please only put __rte_experimental tags in headers ({filename})",
+                            filename,
+                            line_num,
+                            content,
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_experimental":
+                        self.add_result(
+                            "WARNING",
+                            "EXPERIMENTAL_TAG",
+                            "__rte_experimental must appear alone on the line immediately preceding the return type of a function",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+    def check_internal_tags(self, patch_info: PatchInfo) -> None:
+        """Check __rte_internal tag placement."""
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_internal" in content:
+                    # Should only be in headers
+                    if filename.endswith(".c"):
+                        self.add_result(
+                            "WARNING",
+                            "INTERNAL_TAG",
+                            f"Please only put __rte_internal tags in headers ({filename})",
+                            filename,
+                            line_num,
+                            content,
+                        )
+                    # Should appear alone on the line
+                    stripped = content.strip()
+                    if stripped != "__rte_internal":
+                        self.add_result(
+                            "WARNING",
+                            "INTERNAL_TAG",
+                            "__rte_internal must appear alone on the line immediately preceding the return type of a function",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+    def check_aligned_attributes(self, patch_info: PatchInfo) -> None:
+        """Check alignment attribute usage."""
+        align_tokens = [
+            "__rte_aligned",
+            "__rte_cache_aligned",
+            "__rte_cache_min_aligned",
+        ]
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                for token in align_tokens:
+                    if re.search(rf"\b{token}\b", content):
+                        # Should only be used with struct or union
+                        if not re.search(rf"\b(struct|union)\s*{token}\b", content):
+                            self.add_result(
+                                "WARNING",
+                                "ALIGNED_ATTRIBUTE",
+                                f"Please use {token} only for struct or union types alignment",
+                                filename,
+                                line_num,
+                                content,
+                            )
+
+    def check_packed_attributes(self, patch_info: PatchInfo) -> None:
+        """Check packed attribute usage."""
+        begin_count = 0
+        end_count = 0
+
+        for filename, lines in patch_info.added_lines.items():
+            for line_num, content in lines:
+                if "__rte_packed_begin" in content:
+                    begin_count += 1
+                    # Should be after struct, union, or alignment attributes
+                    if (
+                        not re.search(
+                            r"\b(struct|union)\s*__rte_packed_begin\b", content
+                        )
+                        and not re.search(
+                            r"__rte_cache_aligned\s*__rte_packed_begin", content
+                        )
+                        and not re.search(
+                            r"__rte_cache_min_aligned\s*__rte_packed_begin", content
+                        )
+                        and not re.search(
+                            r"__rte_aligned\(.*\)\s*__rte_packed_begin", content
+                        )
+                    ):
+                        self.add_result(
+                            "WARNING",
+                            "PACKED_ATTRIBUTE",
+                            "Use __rte_packed_begin only after struct, union or alignment attributes",
+                            filename,
+                            line_num,
+                            content,
+                        )
+
+                if "__rte_packed_end" in content:
+                    end_count += 1
+
+        if begin_count != end_count:
+            self.add_result(
+                "WARNING",
+                "PACKED_ATTRIBUTE",
+                "__rte_packed_begin and __rte_packed_end should always be used in pairs",
+            )
+
+    def check_patch(self, content: str, patch_file: str = None) -> bool:
+        """Run all checks on a patch."""
+        self.results = []
+        self.errors = 0
+        self.warnings = 0
+        self.checks = 0
+        self.lines_checked = 0
+
+        # Check patch format first
+        self.check_patch_format(content, patch_file)
+
+        patch_info = self.parse_patch(content)
+
+        # Run all checks
+        self.check_signoff(patch_info)
+        self.check_line_length(patch_info)
+        self.check_trailing_whitespace(patch_info)
+        self.check_tabs_spaces(patch_info)
+        self.check_coding_style(patch_info)
+        self.check_spelling(patch_info)
+        self.check_forbidden_tokens(patch_info)
+        self.check_experimental_tags(patch_info)
+        self.check_internal_tags(patch_info)
+        self.check_aligned_attributes(patch_info)
+        self.check_packed_attributes(patch_info)
+        self.check_commit_message(patch_info, content)
+
+        return self.errors == 0 and self.warnings == 0
+
+    def check_patch_format(self, content: str, patch_file: str = None) -> None:
+        """Check basic patch format for corruption."""
+        lines = content.split("\n")
+
+        # Track patch structure
+        has_diff = False
+        has_hunk = False
+        in_hunk = False
+        hunk_line = 0
+
+        for i, line in enumerate(lines, 1):
+            # Track diff headers
+            if line.startswith("diff --git"):
+                has_diff = True
+                in_hunk = False
+
+            # Parse hunk header
+            if line.startswith("@@"):
+                has_hunk = True
+                in_hunk = True
+                hunk_line = i
+                # Validate hunk header format
+                if not re.match(r"@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@", line):
+                    self.add_result(
+                        "ERROR",
+                        "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (malformed hunk header) at line {i}",
+                    )
+
+            # End of patch content (signature separator)
+            elif line == "-- ":
+                in_hunk = False
+
+            # Check for lines that look like they should be in a hunk but aren't prefixed
+            elif (
+                in_hunk
+                and line
+                and not line.startswith(
+                    (
+                        "+",
+                        "-",
+                        " ",
+                        "\\",
+                        "diff ",
+                        "@@",
+                        "index ",
+                        "--- ",
+                        "+++ ",
+                        "new file",
+                        "deleted file",
+                        "old mode",
+                        "new mode",
+                        "rename ",
+                        "similarity",
+                        "copy ",
+                    )
+                )
+            ):
+                # This could be a wrapped line or corruption
+                # But be careful - empty lines and commit message lines are OK
+                if not line.startswith(
+                    (
+                        "From ",
+                        "Subject:",
+                        "Date:",
+                        "Signed-off-by:",
+                        "Acked-by:",
+                        "Reviewed-by:",
+                        "Tested-by:",
+                        "Fixes:",
+                        "Cc:",
+                        "---",
+                        "Message-Id:",
+                    )
+                ):
+                    # Likely a corrupted/wrapped line in the diff
+                    self.add_result(
+                        "ERROR",
+                        "CORRUPTED_PATCH",
+                        f"patch seems to be corrupt (line wrapped?) at line {i}",
+                    )
+                    in_hunk = False  # Stop checking this hunk
+
+        if has_diff and not has_hunk:
+            self.add_result(
+                "ERROR",
+                "CORRUPTED_PATCH",
+                "Patch appears to be corrupted (has diff but no hunks)",
+            )
+
+        # Check for DOS line endings
+        if "\r\n" in content:
+            self.add_result(
+                "ERROR",
+                "DOS_LINE_ENDINGS",
+                "Patch has DOS line endings, should be UNIX line endings",
+            )
+
+    def check_commit_message(self, patch_info: PatchInfo, content: str) -> None:
+        """Check commit message for issues."""
+        lines = content.split("\n")
+
+        in_commit_msg = False
+        commit_msg_lines = []
+
+        for i, line in enumerate(lines):
+            if line.startswith("Subject:"):
+                in_commit_msg = True
+                continue
+            if line.startswith("---") or line.startswith("diff --git"):
+                in_commit_msg = False
+                continue
+            if in_commit_msg:
+                commit_msg_lines.append((i + 1, line))
+
+        for line_num, line in commit_msg_lines:
+            # UNKNOWN_COMMIT_ID: Fixes tag with short or invalid commit ID
+            match = re.match(r"^Fixes:\s*([0-9a-fA-F]+)", line)
+            if match:
+                commit_id = match.group(1)
+                if len(commit_id) < 12:
+                    self.add_result(
+                        "WARNING",
+                        "UNKNOWN_COMMIT_ID",
+                        f"Commit id '{commit_id}' is too short, use at least 12 characters",
+                        line_num=line_num,
+                        line_content=line,
+                    )
+                # Check Fixes format: should be Fixes: <hash> ("commit subject")
+                if not re.match(r'^Fixes:\s+[0-9a-fA-F]{12,}\s+\("[^"]+"\)\s*$', line):
+                    self.add_result(
+                        "WARNING",
+                        "BAD_FIXES_TAG",
+                        'Fixes: tag format should be: Fixes: <12+ char hash> ("commit subject")',
+                        line_num=line_num,
+                        line_content=line,
+                    )
+
+    def format_results(self, show_types: bool = True) -> str:
+        """Format the results for output."""
+        output = []
+
+        for result in self.results:
+            if result.filename and result.line_num:
+                prefix = f"{result.filename}:{result.line_num}:"
+            elif result.filename:
+                prefix = f"{result.filename}:"
+            else:
+                prefix = ""
+
+            type_str = f" [{result.type_name}]" if show_types else ""
+            output.append(f"{result.level}:{type_str} {result.message}")
+
+            if prefix:
+                output.append(f"#  {prefix}")
+            if result.line_content:
+                output.append(f"+  {result.line_content}")
+            output.append("")
+
+        return "\n".join(output)
+
+    def get_summary(self) -> str:
+        """Get a summary of the check results."""
+        return f"total: {self.errors} errors, {self.warnings} warnings, {self.checks} checks, {self.lines_checked} lines checked"
+
+
+def split_mbox(content: str) -> list[str]:
+    """Split an mbox file into individual messages.
+
+    Mbox format uses 'From ' at the start of a line as message separator.
+    """
+    messages = []
+    current = []
+
+    for line in content.split("\n"):
+        # Standard mbox separator: line starting with "From " followed by
+        # an address or identifier and a date
+        if line.startswith("From ") and current:
+            messages.append("\n".join(current))
+            current = [line]
+        else:
+            current.append(line)
+
+    if current:
+        messages.append("\n".join(current))
+
+    return messages
+
+
+def check_single_patch(
+    checker: CheckPatch,
+    patch_path: Optional[str],
+    commit: Optional[str],
+    verbose: bool,
+    quiet: bool,
+    pre_content: Optional[str] = None,
+) -> bool:
+    """Check a single patch file or commit."""
+    subject = ""
+    content = ""
+
+    if pre_content:
+        content = pre_content
+    elif patch_path:
+        try:
+            with open(patch_path, "r", encoding="utf-8", errors="replace") as f:
+                content = f.read()
+        except IOError as e:
+            print(f"Error reading {patch_path}: {e}", file=sys.stderr)
+            return False
+    elif commit:
+        try:
+            result = subprocess.run(
+                [
+                    "git",
+                    "format-patch",
+                    "--find-renames",
+                    "--no-stat",
+                    "--stdout",
+                    "-1",
+                    commit,
+                ],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode != 0:
+                print(f"Error getting commit {commit}", file=sys.stderr)
+                return False
+            content = result.stdout
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            print(f"Error running git: {e}", file=sys.stderr)
+            return False
+    else:
+        content = sys.stdin.read()
+
+    # Extract subject
+    match = re.search(
+        r"^Subject:\s*(.+?)(?:\n(?=\S)|\n\n)", content, re.MULTILINE | re.DOTALL
+    )
+    if match:
+        subject = match.group(1).replace("\n ", " ").strip()
+
+    if verbose:
+        print(f"\n### {subject}\n")
+
+    is_clean = checker.check_patch(content, patch_path)
+    has_issues = checker.errors > 0 or checker.warnings > 0
+    has_any_results = has_issues or checker.checks > 0
+
+    if has_any_results or verbose:
+        if not verbose and subject:
+            print(f"\n### {subject}\n")
+        print(checker.format_results(show_types=True))
+        print(checker.get_summary())
+
+    return is_clean
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Check patches for DPDK coding style and common issues",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s patch.diff                Check a patch file
+  %(prog)s -n 3                      Check last 3 commits
+  %(prog)s -r origin/main..HEAD      Check commits in range
+  cat patch.diff | %(prog)s          Check patch from stdin
+""",
+    )
+
+    parser.add_argument("patches", nargs="*", help="Patch files to check")
+    parser.add_argument("-n", type=int, metavar="NUM", help="Check last NUM commits")
+    parser.add_argument(
+        "-r",
+        "--range",
+        metavar="RANGE",
+        help="Check commits in git range (default: origin/main..)",
+    )
+    parser.add_argument(
+        "-q", "--quiet", action="store_true", help="Quiet mode - only show summary"
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Verbose mode - show all checks"
+    )
+    parser.add_argument(
+        "--max-line-length",
+        type=int,
+        default=DEFAULT_LINE_LENGTH,
+        help=f"Maximum line length (default: {DEFAULT_LINE_LENGTH})",
+    )
+    parser.add_argument(
+        "--codespell",
+        action="store_true",
+        default=True,
+        help="Enable spell checking (default: enabled)",
+    )
+    parser.add_argument(
+        "--no-codespell",
+        dest="codespell",
+        action="store_false",
+        help="Disable spell checking",
+    )
+    parser.add_argument(
+        "--codespellfile", metavar="FILE", help="Path to codespell dictionary"
+    )
+    parser.add_argument(
+        "--show-types",
+        action="store_true",
+        default=True,
+        help="Show message types (default: enabled)",
+    )
+    parser.add_argument(
+        "--no-show-types",
+        dest="show_types",
+        action="store_false",
+        help="Hide message types",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    """Main entry point."""
+    args = parse_args()
+
+    # Build configuration
+    config = {
+        "max_line_length": args.max_line_length,
+        "codespell": args.codespell,
+        "show_types": args.show_types,
+    }
+
+    if args.codespellfile:
+        config["codespell_file"] = args.codespellfile
+
+    checker = CheckPatch(config)
+
+    total = 0
+    failed = 0
+
+    if args.patches:
+        # Check specified patch files
+        for patch in args.patches:
+            try:
+                with open(patch, "r", encoding="utf-8", errors="replace") as f:
+                    content = f.read()
+            except IOError as e:
+                print(f"Error reading {patch}: {e}", file=sys.stderr)
+                total += 1
+                failed += 1
+                continue
+
+            # Check if this is an mbox with multiple patches
+            messages = split_mbox(content)
+            if len(messages) > 1:
+                for msg in messages:
+                    # Only process messages that contain diffs
+                    if "diff --git" in msg or "---" in msg:
+                        total += 1
+                        if not check_single_patch(
+                            checker, None, None, args.verbose, args.quiet, msg
+                        ):
+                            failed += 1
+            else:
+                total += 1
+                if not check_single_patch(
+                    checker, patch, None, args.verbose, args.quiet
+                ):
+                    failed += 1
+
+    elif args.n or args.range:
+        # Check git commits
+        if args.n:
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", f"--max-count={args.n}", "HEAD"],
+                capture_output=True,
+                text=True,
+            )
+        else:
+            git_range = args.range if args.range else "origin/main.."
+            result = subprocess.run(
+                ["git", "rev-list", "--reverse", git_range],
+                capture_output=True,
+                text=True,
+            )
+
+        if result.returncode != 0:
+            print("Error getting git commits", file=sys.stderr)
+            sys.exit(1)
+
+        commits = result.stdout.strip().split("\n")
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(
+                    checker, None, commit, args.verbose, args.quiet
+                ):
+                    failed += 1
+
+    elif not sys.stdin.isatty():
+        # Read from stdin
+        total = 1
+        if not check_single_patch(checker, None, None, args.verbose, args.quiet):
+            failed += 1
+
+    else:
+        # Default to checking commits since origin/main
+        result = subprocess.run(
+            ["git", "rev-list", "--reverse", "origin/main.."],
+            capture_output=True,
+            text=True,
+        )
+
+        commits = result.stdout.strip().split("\n") if result.stdout.strip() else []
+        for commit in commits:
+            if commit:
+                total += 1
+                if not check_single_patch(
+                    checker, None, commit, args.verbose, args.quiet
+                ):
+                    failed += 1
+
+    # Print summary
+    passed = total - failed
+    if not args.quiet:
+        print(f"\n{passed}/{total} valid patch{'es' if passed != 1 else ''}")
+
+    sys.exit(0 if failed == 0 else 1)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2026-03-24 14:49 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-31 20:48 [RFC] devtools: replace get-maintainer shell wrapper with Python script Stephen Hemminger
2026-02-01 13:51 ` Thomas Monjalon
2026-02-01 19:01   ` Stephen Hemminger
2026-02-01 20:16     ` Thomas Monjalon
2026-02-01 22:23       ` Stephen Hemminger
2026-02-01 19:22 ` [RFC v2] devtools: replace checkpatches " Stephen Hemminger
2026-02-03 14:17 ` [RFC v3] " Stephen Hemminger
2026-02-04 16:59 ` [PATCH v4] " Stephen Hemminger
2026-02-04 17:29   ` Bruce Richardson
2026-02-04 17:32   ` Bruce Richardson
2026-02-05  1:43     ` Stephen Hemminger
2026-02-26 17:15 ` [PATCH v5] devtools: add Python-based patch style checker Stephen Hemminger
2026-03-24 14:48 ` [PATCH v6] " Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox