All of lore.kernel.org
 help / color / mirror / Atom feed
* [LTP] [PATCH v2] metadata: add linter for JSON file
@ 2026-06-25  7:09 Andrea Cervesato
  2026-06-25 10:42 ` Cyril Hrubis
  0 siblings, 1 reply; 4+ messages in thread
From: Andrea Cervesato @ 2026-06-25  7:09 UTC (permalink / raw)
  To: Linux Test Project

From: Andrea Cervesato <andrea.cervesato@suse.com>

Add a linter to verify that metadata contains the correct data. For now
it verifies that:

- groups tag is correct, according to the parent folders
- CVE value is well defined
- CVE number actually exists

Signed-off-by: Andrea Cervesato <andrea.cervesato@suse.com>
---
Changes in v2:
- fix --check-cve-online
- Link to v1: https://lore.kernel.org/r/20260624-metadata_linter-v1-1-3d9506169aad@suse.com
---
 .gitignore        |   1 +
 metadata/Makefile |   4 +
 metadata/lint.py  | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 229 insertions(+)

diff --git a/.gitignore b/.gitignore
index f10cd0c80e3655ad720e465f47c12ad0d51e7cd1..3450ded24840547bfc5ce572d6a73d8ce2605f20 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,6 +55,7 @@ patches/
 *.run-test
 *.test
 logfile.*
+__pycache__
 
 /utils/benchmark/ebizzy-0.3/ebizzy
 
diff --git a/metadata/Makefile b/metadata/Makefile
index 6939b9f76ccc5612e9f6b56e88bc0a2f60a03234..641b02575d10d3af60975e14733a6085317758bc 100644
--- a/metadata/Makefile
+++ b/metadata/Makefile
@@ -15,6 +15,10 @@ INSTALL_DIR		= metadata
 ltp.json: metaparse metaparse-sh
 	$(abs_srcdir)/parse.sh > ltp.json
 
+.PHONY: lint
+lint: ltp.json
+	$(abs_srcdir)/lint.py ltp.json
+
 test:
 	$(MAKE) -C $(abs_srcdir)/tests/ test
 
diff --git a/metadata/lint.py b/metadata/lint.py
new file mode 100755
index 0000000000000000000000000000000000000000..4511ee9bd408af4b10cd8b3331f5f0589684aba1
--- /dev/null
+++ b/metadata/lint.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2026 Linux Test Project
+"""
+Lint semantic consistency of generated metadata/ltp.json.
+
+This is not a schema validator; metaparse tests cover JSON shape. The linter
+checks metadata rules that depend on the final generated test catalog:
+
+  * Groups derived from the source path (the two nearest parent directories,
+    skipping 'kernel' and 'cve') must be present in test 'groups'.
+
+  * A CVE tag requires the 'cve' group and a linux-git tag requires the
+    'regression' group.
+
+  * CVE tag values must use a valid bare YYYY-NNNN[...] identifier. With
+    --check-cve-exists, every CVE is verified against the official CVE
+    Services API (https://cveawg.mitre.org).
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+from typing import (
+    Any,
+    Dict,
+    List,
+    Pattern,
+    Tuple,
+)
+
+CVE_RE: Pattern[str] = re.compile(r"^[0-9]{4}-[0-9]{4,}$")
+CVE_API: str = "https://cveawg.mitre.org/api/cve/CVE-"
+SKIP_PATH_GROUPS: Tuple[str, ...] = ("kernel", "cve")
+
+
+def path_groups(fname: str) -> List[str]:
+    """
+    Return groups derived from the two nearest parent directories.
+    """
+    prefix = "testcases/"
+    if not fname.startswith(prefix):
+        return []
+
+    dirs = fname[len(prefix) :].split("/")[:-1]
+    return [grp for grp in reversed(dirs[-2:]) if grp not in SKIP_PATH_GROUPS]
+
+
+def tag_values(tags: List[List[str]], name: str) -> List[str]:
+    """
+    Return all values for metadata tags matching name.
+    """
+    return [tag[1] for tag in tags if len(tag) >= 2 and tag[0] == name]
+
+
+def has_tag(tags: List[List[str]], name: str) -> bool:
+    """
+    Return whether a metadata tag exists.
+    """
+    return any(tag and tag[0] == name for tag in tags)
+
+
+def expected_groups(conf: Dict[str, Any]) -> List[str]:
+    """
+    Return groups expected from test path and tags.
+    """
+    groups: List[str] = []
+    fname: str = conf.get("fname", "")
+    tags: List[List[str]] = conf.get("tags", [])
+
+    for group in path_groups(fname):
+        if group not in groups:
+            groups.append(group)
+
+    if has_tag(tags, "CVE") and "cve" not in groups:
+        groups.append("cve")
+
+    if has_tag(tags, "linux-git") and "regression" not in groups:
+        groups.append("regression")
+
+    return groups
+
+
+def lint_groups(name: str, conf: Dict[str, Any]) -> List[str]:
+    """
+    Return group lint errors for a single test.
+    """
+    errors: List[str] = []
+    groups: List[str] = conf.get("groups", [])
+    expected: List[str] = expected_groups(conf)
+    missing: List[str] = [group for group in expected if group not in groups]
+
+    if missing:
+        errors.append(f"{name}: missing groups: {', '.join(missing)}")
+
+    return errors
+
+
+def lint_cve_format(name: str, conf: Dict[str, Any]) -> List[str]:
+    """
+    Return CVE format lint errors for a single test.
+    """
+    errors: List[str] = []
+    tags: List[List[str]] = conf.get("tags", [])
+
+    for cve in tag_values(tags, "CVE"):
+        if cve.upper().startswith("CVE-"):
+            errors.append(
+                f"{name}: CVE tag '{cve}' must not start with 'CVE-' prefix, "
+                "use the bare 'YYYY-NNNN' identifier"
+            )
+        elif not CVE_RE.match(cve):
+            errors.append(f"{name}: malformed CVE identifier '{cve}'")
+
+    return errors
+
+
+def cve_exists(cve: str, cache: Dict[str, bool]) -> bool:
+    """
+    Query the CVE Services API and cache the answer per identifier.
+    """
+    import urllib.error
+    import urllib.request
+
+    if cve in cache:
+        return cache[cve]
+
+    req = urllib.request.Request(CVE_API + cve, method="GET")
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            ok = resp.status == 200
+    except urllib.error.HTTPError as err:
+        if err.code == 404:
+            ok = False
+        else:
+            raise
+    except urllib.error.URLError as err:
+        raise RuntimeError(f"cannot reach CVE API: {err}") from err
+
+    cache[cve] = ok
+    return ok
+
+
+def lint_cve_existence(
+    name: str,
+    conf: Dict[str, Any],
+    cache: Dict[str, bool],
+) -> List[str]:
+    """
+    Return CVE existence lint errors for a single test.
+    """
+    errors: List[str] = []
+    tags: List[List[str]] = conf.get("tags", [])
+
+    for cve in tag_values(tags, "CVE"):
+        if CVE_RE.match(cve) and not cve_exists(cve, cache):
+            errors.append(f"{name}: CVE '{cve}' does not exist")
+
+    return errors
+
+
+def lint_tests(tests: Dict[str, Dict[str, Any]], check_cve_exists: bool) -> List[str]:
+    """
+    Return all lint errors for generated test metadata.
+    """
+    errors: List[str] = []
+    cache: Dict[str, bool] = {}
+
+    for name, conf in sorted(tests.items()):
+        errors += lint_groups(name, conf)
+        errors += lint_cve_format(name, conf)
+        if check_cve_exists:
+            errors += lint_cve_existence(name, conf, cache)
+
+    return errors
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    default = os.path.join(os.path.dirname(__file__), "ltp.json")
+    parser.add_argument(
+        "metadata",
+        nargs="?",
+        default=default,
+        help="path to the ltp.json metadata file (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--check-cve-online",
+        action="store_true",
+        help="verify CVE existence against the online CVE database",
+    )
+    args = parser.parse_args()
+
+    try:
+        with open(args.metadata, encoding="utf-8") as data:
+            metadata: Dict[str, Any] = json.load(data)
+    except FileNotFoundError:
+        sys.exit(
+            f"error: metadata file '{args.metadata}' not found "
+            "(run 'make' in metadata/ first)"
+        )
+    except json.JSONDecodeError as err:
+        sys.exit(f"error: failed to parse '{args.metadata}': {err}")
+
+    tests: Dict[str, Dict[str, Any]] = metadata.get("tests", {})
+    errors: List[str] = lint_tests(tests, args.check_cve_online)
+
+    for err in errors:
+        print(err, file=sys.stderr)
+
+    if errors:
+        print(f"\n{len(errors)} error(s) found in {len(tests)} tests", file=sys.stderr)
+        return 1
+
+    print(f"metadata lint: {len(tests)} tests OK")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

---
base-commit: 534222c4f3908e9642f913399e37a66fdd266bbe
change-id: 20260624-metadata_linter-41c60691bcb2

Best regards,
-- 
Andrea Cervesato <andrea.cervesato@suse.com>


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-25 13:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-25  7:09 [LTP] [PATCH v2] metadata: add linter for JSON file Andrea Cervesato
2026-06-25 10:42 ` Cyril Hrubis
2026-06-25 13:13   ` Andrea Cervesato via ltp
2026-06-25 13:17     ` Cyril Hrubis

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.