public inbox for openembedded-core@lists.openembedded.org
 help / color / mirror / Atom feed
From: Joshua Watt <jpewhacker@gmail.com>
To: openembedded-core@lists.openembedded.org
Cc: benjamin.robin@bootlin.com, ross.burton@arm.com,
	Joshua Watt <JPEWhacker@gmail.com>
Subject: [OE-core][PATCH v3 7/8] spdx: De-duplicate CreationInfo
Date: Thu, 26 Feb 2026 10:33:08 -0700	[thread overview]
Message-ID: <20260226173930.2847872-8-JPEWhacker@gmail.com> (raw)
In-Reply-To: <20260226173930.2847872-1-JPEWhacker@gmail.com>

De-duplicates CreationInfo objects that are identical (except for ID)
when writing out an SBoM. This significantly reduces the number of
CreationInfo objects that end up in the final document.

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 meta/lib/oe/sbom30.py | 112 ++++++++++++++++++++++++++++++------------
 meta/lib/oe/spdx30.py |   2 +-
 2 files changed, 81 insertions(+), 33 deletions(-)

diff --git a/meta/lib/oe/sbom30.py b/meta/lib/oe/sbom30.py
index 21f084dc16..55a2863d2d 100644
--- a/meta/lib/oe/sbom30.py
+++ b/meta/lib/oe/sbom30.py
@@ -14,6 +14,7 @@ import uuid
 import os
 import oe.spdx_common
 from datetime import datetime, timezone
+from contextlib import contextmanager
 
 OE_SPDX_BASE = "https://rdf.openembedded.org/spdx/3.0/"
 
@@ -191,6 +192,25 @@ def to_list(l):
     return l
 
 
+class Dedup(object):
+    def __init__(self, objset):
+        self.unique = set()
+        self.dedup = {}
+        self.objset = objset
+
+    def find_duplicates(self, cmp, typ, **kwargs):
+        for o in self.objset.foreach_filter(typ, **kwargs):
+            for u in self.unique:
+                if cmp(u, o):
+                    self.dedup[o] = u
+                    break
+            else:
+                self.unique.add(o)
+
+    def get(self, o):
+        return self.dedup.get(o, o)
+
+
 class ObjectSet(oe.spdx30.SHACLObjectSet):
     def __init__(self, d):
         super().__init__()
@@ -895,6 +915,45 @@ class ObjectSet(oe.spdx30.SHACLObjectSet):
         self.missing_ids -= set(imports.keys())
         return self.missing_ids
 
+    @contextmanager
+    def deduplicate(self):
+        d = Dedup(self)
+
+        yield d
+
+        visited = set()
+
+        def visit(o, path):
+            if isinstance(o, oe.spdx30.SHACLObject):
+                if o in visited:
+                    return False
+                visited.add(o)
+
+                for k in o:
+                    v = o[k]
+                    if isinstance(v, oe.spdx30.SHACLObject):
+                        o[k] = d.get(v)
+
+            elif isinstance(o, oe.spdx30.ListProxy):
+                for idx, v in enumerate(o):
+                    if isinstance(v, oe.spdx30.SHACLObject):
+                        o[idx] = d.get(v)
+
+            return True
+
+        if d.dedup:
+            for o in self.objects:
+                o.walk(visit)
+
+            for k, v in d.dedup.items():
+                bb.debug(
+                    1,
+                    f"Removing duplicate {k.__class__.__name__} {k._id or id(k)} -> {v._id or id(v)}",
+                )
+                self.objects.discard(k)
+
+            self.create_index()
+
 
 def load_jsonld(d, path, required=False):
     deserializer = oe.spdx30.JSONLDDeserializer()
@@ -1080,39 +1139,28 @@ def create_sbom(d, name, root_elements, add_objectsets=[]):
     # SBoM should be the only root element of the document
     objset.doc.rootElement = [sbom]
 
-    # De-duplicate licenses
-    unique = set()
-    dedup = {}
-    for lic in objset.foreach_type(oe.spdx30.simplelicensing_LicenseExpression):
-        for u in unique:
-            if (
-                u.simplelicensing_licenseExpression
-                == lic.simplelicensing_licenseExpression
-                and u.simplelicensing_licenseListVersion
-                == lic.simplelicensing_licenseListVersion
-            ):
-                dedup[lic] = u
-                break
-        else:
-            unique.add(lic)
-
-    if dedup:
-        for rel in objset.foreach_filter(
-            oe.spdx30.Relationship,
-            relationshipType=oe.spdx30.RelationshipType.hasDeclaredLicense,
-        ):
-            rel.to = [dedup.get(to, to) for to in rel.to]
-
-        for rel in objset.foreach_filter(
-            oe.spdx30.Relationship,
-            relationshipType=oe.spdx30.RelationshipType.hasConcludedLicense,
-        ):
-            rel.to = [dedup.get(to, to) for to in rel.to]
+    def cmp_license_expression(a, b):
+        return (
+            a.simplelicensing_licenseExpression == b.simplelicensing_licenseExpression
+            and a.simplelicensing_licenseListVersion
+            == b.simplelicensing_licenseListVersion
+        )
 
-        for k, v in dedup.items():
-            bb.debug(1, f"Removing duplicate License {k._id} -> {v._id}")
-            objset.objects.remove(k)
+    def cmp_creation_info(a, b):
+        data_a = {k: a[k] for k in a}
+        data_b = {k: b[k] for k in b}
+        data_a["@id"] = ""
+        data_b["@id"] = ""
+        return data_a == data_b
+
+    with objset.deduplicate() as dedup:
+        # De-duplicate licenses
+        dedup.find_duplicates(
+            cmp_license_expression,
+            oe.spdx30.simplelicensing_LicenseExpression,
+        )
 
-        objset.create_index()
+        # Deduplicate creation info
+        dedup.find_duplicates(cmp_creation_info, oe.spdx30.CreationInfo)
 
     return objset, sbom
diff --git a/meta/lib/oe/spdx30.py b/meta/lib/oe/spdx30.py
index cd97eebd18..1f58402ffc 100644
--- a/meta/lib/oe/spdx30.py
+++ b/meta/lib/oe/spdx30.py
@@ -701,7 +701,7 @@ class SHACLObject(object):
         self.__dict__["_obj_data"][iri] = prop.init()
 
     def __iter__(self):
-        return self._OBJ_PROPERTIES.keys()
+        return iter(self._OBJ_PROPERTIES.keys())
 
     def walk(self, callback, path=None):
         """
-- 
2.53.0



  parent reply	other threads:[~2026-02-26 17:39 UTC|newest]

Thread overview: 113+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-20 15:40 [OE-core][PATCH 0/9] Add SPDX 3 Recipe Information Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 1/9] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 2/9] gcc-source: " Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 3/9] spdx3: Add recipe SPDX data Joshua Watt
2026-02-22  7:59   ` Mathieu Dubois-Briand
2026-02-20 15:40 ` [OE-core][PATCH 4/9] spdx3: Add recipe SBoM task Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 5/9] spdx3: Add is-native property Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 6/9] spdx30: Include patch file information in VEX Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 7/9] spdx: De-duplicate CreationInfo Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 8/9] spdx: Ignore ASSUME_PROVIDED recipes Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 9/9] spdx_common: Check for dependent task in task flags Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 0/8] Add SPDX 3 Recipe Information Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 1/8] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 2/8] gcc-source: " Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 3/8] spdx3: Add recipe SPDX data Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 4/8] spdx3: Add recipe SBoM task Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 5/8] spdx3: Add is-native property Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 6/8] spdx30: Include patch file information in VEX Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 7/8] spdx: De-duplicate CreationInfo Joshua Watt
2026-02-24 23:00   ` [OE-core][PATCH v2 8/8] spdx_common: Check for dependent task in task flags Joshua Watt
2026-02-26 12:52   ` [OE-core][PATCH v2 0/8] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-02-26 14:27     ` Benjamin Robin
2026-02-26 15:09       ` Benjamin Robin
2026-02-26 15:41         ` Joshua Watt
2026-02-26 17:33   ` [OE-core][PATCH v3 " Joshua Watt
2026-02-26 17:33     ` [OE-core][PATCH v3 1/8] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-02-26 17:33     ` [OE-core][PATCH v3 2/8] gcc-source: " Joshua Watt
2026-02-26 17:33     ` [OE-core][PATCH v3 3/8] spdx3: Add recipe SPDX data Joshua Watt
2026-02-26 17:33     ` [OE-core][PATCH v3 4/8] spdx3: Add recipe SBoM task Joshua Watt
2026-02-26 17:33     ` [OE-core][PATCH v3 5/8] spdx3: Add is-native property Joshua Watt
2026-02-26 17:33     ` [OE-core][PATCH v3 6/8] spdx30: Include patch file information in VEX Joshua Watt
2026-02-26 17:33     ` Joshua Watt [this message]
2026-02-26 17:33     ` [OE-core][PATCH v3 8/8] spdx_common: Check for dependent task in task flags Joshua Watt
2026-02-27  7:32     ` [OE-core][PATCH v3 0/8] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-03-03  0:43     ` [OE-core][PATCH v4 0/9] " Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 1/9] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 2/9] gcc-source: " Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 3/9] spdx3: Add recipe SPDX data Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 4/9] spdx3: Add recipe SBoM task Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 5/9] spdx3: Add is-native property Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 6/9] spdx30: Include patch file information in VEX Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 7/9] spdx: De-duplicate CreationInfo Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 8/9] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-03  0:43       ` [OE-core][PATCH v4 9/9] spdx30: Skip install package CVE information Joshua Watt
2026-03-03 10:17       ` [OE-core][PATCH v4 0/9] Add SPDX 3 Recipe Information Antonin Godard
2026-03-03 14:08       ` Mathieu Dubois-Briand
2026-03-04 16:44       ` [OE-core][PATCH v5 00/13] " Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 01/13] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 02/13] gcc-source: " Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 03/13] spdx3: Add recipe SPDX data Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 04/13] spdx3: Add recipe SBoM task Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 05/13] spdx3: Add is-native property Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 06/13] spdx30: Include patch file information in VEX Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 07/13] spdx: De-duplicate CreationInfo Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 08/13] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 09/13] spdx30: Skip install package CVE information Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 10/13] dummy-sdk-package: Disable SPDX Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 11/13] spdx: Remove fatal errors for missing providers Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 12/13] spdx3: Use common variable for vardeps Joshua Watt
2026-03-04 16:44         ` [OE-core][PATCH v5 13/13] glibc-testsuite: Do not generate SPDX Joshua Watt
2026-03-05 19:59         ` [OE-core][PATCH v5 00/13] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-03-10 18:38         ` [OE-core][PATCH v6 00/15] " Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 01/15] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 02/15] gcc-source: " Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 03/15] spdx3: Add recipe SPDX data Joshua Watt
2026-03-12 11:43             ` Richard Purdie
2026-03-12 14:11               ` Joshua Watt
2026-03-12 17:50                 ` Richard Purdie
2026-03-10 18:38           ` [OE-core][PATCH v6 04/15] spdx3: Add recipe SBoM task Joshua Watt
2026-03-12 11:50             ` Richard Purdie
2026-03-12 14:12               ` Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 05/15] spdx3: Add is-native property Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 06/15] spdx30: Include patch file information in VEX Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 07/15] spdx: De-duplicate CreationInfo Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 08/15] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 09/15] spdx30: Skip install package CVE information Joshua Watt
2026-03-12 11:55             ` Richard Purdie
2026-03-12 14:15               ` Joshua Watt
2026-03-12 15:52                 ` Richard Purdie
2026-03-12 16:11                   ` Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 10/15] dummy-sdk-package: Disable SPDX Joshua Watt
2026-03-12 11:59             ` Richard Purdie
2026-03-12 14:24               ` Joshua Watt
2026-03-12 15:58                 ` Richard Purdie
2026-03-12 16:06                   ` Joshua Watt
2026-03-12 16:43                     ` Joshua Watt
2026-03-12 18:02                       ` Joshua Watt
2026-03-12 20:34                         ` Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 11/15] spdx: Remove fatal errors for missing providers Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 12/15] spdx3: Use common variable for vardeps Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 13/15] glibc-testsuite: Do not generate SPDX Joshua Watt
2026-03-10 18:38           ` [OE-core][PATCH v6 14/15] spdx: Remove do_collect_spdx_deps task Joshua Watt
2026-03-11 13:55           ` [OE-core][PATCH v6 00/15] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-03-11 16:39             ` Joshua Watt
2026-03-11 19:33               ` Mathieu Dubois-Briand
2026-03-11 22:56                 ` Joshua Watt
2026-03-18 13:44           ` [OE-core][PATCH v7 00/12] " Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 01/12] spdx3: Add recipe SPDX data Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 02/12] spdx3: Add recipe SBoM task Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 03/12] spdx3: Add is-native property Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 04/12] spdx30: Include patch file information in VEX Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 05/12] spdx: De-duplicate CreationInfo Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 06/12] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 07/12] spdx30: Remove package VEX Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 08/12] spdx: Remove fatal errors for missing providers Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 09/12] spdx3: Use common variable for vardeps Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 10/12] glibc-testsuite: Do not generate SPDX Joshua Watt
2026-03-18 13:44             ` [OE-core][PATCH v7 11/12] spdx: Remove do_collect_spdx_deps task Joshua Watt
2026-03-18 13:49             ` [OE-core][PATCH v7 00/12] Add SPDX 3 Recipe Information Joshua Watt
2026-03-19  7:07               ` Mathieu Dubois-Briand
2026-03-19 12:02                 ` Mathieu Dubois-Briand
2026-03-19 21:55                 ` Joshua Watt
2026-03-19 22:14                   ` Richard Purdie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260226173930.2847872-8-JPEWhacker@gmail.com \
    --to=jpewhacker@gmail.com \
    --cc=benjamin.robin@bootlin.com \
    --cc=openembedded-core@lists.openembedded.org \
    --cc=ross.burton@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox