From: Joshua Watt <jpewhacker@gmail.com>
To: openembedded-core@lists.openembedded.org
Cc: Joshua Watt <JPEWhacker@gmail.com>
Subject: [OE-core][PATCH v7 05/12] spdx: De-duplicate CreationInfo
Date: Wed, 18 Mar 2026 07:44:33 -0600 [thread overview]
Message-ID: <20260318134655.953233-6-JPEWhacker@gmail.com> (raw)
In-Reply-To: <20260318134655.953233-1-JPEWhacker@gmail.com>
De-duplicates CreationInfo objects that are identical (except for ID)
when writing out an SBoM. This significantly reduces the number of
CreationInfo objects that end up in the final document.
Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
meta/lib/oe/sbom30.py | 112 ++++++++++++++++++++++++++++++------------
meta/lib/oe/spdx30.py | 2 +-
2 files changed, 81 insertions(+), 33 deletions(-)
diff --git a/meta/lib/oe/sbom30.py b/meta/lib/oe/sbom30.py
index 21f084dc16..55a2863d2d 100644
--- a/meta/lib/oe/sbom30.py
+++ b/meta/lib/oe/sbom30.py
@@ -14,6 +14,7 @@ import uuid
import os
import oe.spdx_common
from datetime import datetime, timezone
+from contextlib import contextmanager
OE_SPDX_BASE = "https://rdf.openembedded.org/spdx/3.0/"
@@ -191,6 +192,25 @@ def to_list(l):
return l
+class Dedup(object):
+ def __init__(self, objset):
+ self.unique = set()
+ self.dedup = {}
+ self.objset = objset
+
+ def find_duplicates(self, cmp, typ, **kwargs):
+ for o in self.objset.foreach_filter(typ, **kwargs):
+ for u in self.unique:
+ if cmp(u, o):
+ self.dedup[o] = u
+ break
+ else:
+ self.unique.add(o)
+
+ def get(self, o):
+ return self.dedup.get(o, o)
+
+
class ObjectSet(oe.spdx30.SHACLObjectSet):
def __init__(self, d):
super().__init__()
@@ -895,6 +915,45 @@ class ObjectSet(oe.spdx30.SHACLObjectSet):
self.missing_ids -= set(imports.keys())
return self.missing_ids
+ @contextmanager
+ def deduplicate(self):
+ d = Dedup(self)
+
+ yield d
+
+ visited = set()
+
+ def visit(o, path):
+ if isinstance(o, oe.spdx30.SHACLObject):
+ if o in visited:
+ return False
+ visited.add(o)
+
+ for k in o:
+ v = o[k]
+ if isinstance(v, oe.spdx30.SHACLObject):
+ o[k] = d.get(v)
+
+ elif isinstance(o, oe.spdx30.ListProxy):
+ for idx, v in enumerate(o):
+ if isinstance(v, oe.spdx30.SHACLObject):
+ o[idx] = d.get(v)
+
+ return True
+
+ if d.dedup:
+ for o in self.objects:
+ o.walk(visit)
+
+ for k, v in d.dedup.items():
+ bb.debug(
+ 1,
+ f"Removing duplicate {k.__class__.__name__} {k._id or id(k)} -> {v._id or id(v)}",
+ )
+ self.objects.discard(k)
+
+ self.create_index()
+
def load_jsonld(d, path, required=False):
deserializer = oe.spdx30.JSONLDDeserializer()
@@ -1080,39 +1139,28 @@ def create_sbom(d, name, root_elements, add_objectsets=[]):
# SBoM should be the only root element of the document
objset.doc.rootElement = [sbom]
- # De-duplicate licenses
- unique = set()
- dedup = {}
- for lic in objset.foreach_type(oe.spdx30.simplelicensing_LicenseExpression):
- for u in unique:
- if (
- u.simplelicensing_licenseExpression
- == lic.simplelicensing_licenseExpression
- and u.simplelicensing_licenseListVersion
- == lic.simplelicensing_licenseListVersion
- ):
- dedup[lic] = u
- break
- else:
- unique.add(lic)
-
- if dedup:
- for rel in objset.foreach_filter(
- oe.spdx30.Relationship,
- relationshipType=oe.spdx30.RelationshipType.hasDeclaredLicense,
- ):
- rel.to = [dedup.get(to, to) for to in rel.to]
-
- for rel in objset.foreach_filter(
- oe.spdx30.Relationship,
- relationshipType=oe.spdx30.RelationshipType.hasConcludedLicense,
- ):
- rel.to = [dedup.get(to, to) for to in rel.to]
+ def cmp_license_expression(a, b):
+ return (
+ a.simplelicensing_licenseExpression == b.simplelicensing_licenseExpression
+ and a.simplelicensing_licenseListVersion
+ == b.simplelicensing_licenseListVersion
+ )
- for k, v in dedup.items():
- bb.debug(1, f"Removing duplicate License {k._id} -> {v._id}")
- objset.objects.remove(k)
+ def cmp_creation_info(a, b):
+ data_a = {k: a[k] for k in a}
+ data_b = {k: b[k] for k in b}
+ data_a["@id"] = ""
+ data_b["@id"] = ""
+ return data_a == data_b
+
+ with objset.deduplicate() as dedup:
+ # De-duplicate licenses
+ dedup.find_duplicates(
+ cmp_license_expression,
+ oe.spdx30.simplelicensing_LicenseExpression,
+ )
- objset.create_index()
+ # Deduplicate creation info
+ dedup.find_duplicates(cmp_creation_info, oe.spdx30.CreationInfo)
return objset, sbom
diff --git a/meta/lib/oe/spdx30.py b/meta/lib/oe/spdx30.py
index cd97eebd18..1f58402ffc 100644
--- a/meta/lib/oe/spdx30.py
+++ b/meta/lib/oe/spdx30.py
@@ -701,7 +701,7 @@ class SHACLObject(object):
self.__dict__["_obj_data"][iri] = prop.init()
def __iter__(self):
- return self._OBJ_PROPERTIES.keys()
+ return iter(self._OBJ_PROPERTIES.keys())
def walk(self, callback, path=None):
"""
--
2.53.0
next prev parent reply other threads:[~2026-03-18 13:47 UTC|newest]
Thread overview: 113+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-20 15:40 [OE-core][PATCH 0/9] Add SPDX 3 Recipe Information Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 1/9] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 2/9] gcc-source: " Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 3/9] spdx3: Add recipe SPDX data Joshua Watt
2026-02-22 7:59 ` Mathieu Dubois-Briand
2026-02-20 15:40 ` [OE-core][PATCH 4/9] spdx3: Add recipe SBoM task Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 5/9] spdx3: Add is-native property Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 6/9] spdx30: Include patch file information in VEX Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 7/9] spdx: De-duplicate CreationInfo Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 8/9] spdx: Ignore ASSUME_PROVIDED recipes Joshua Watt
2026-02-20 15:40 ` [OE-core][PATCH 9/9] spdx_common: Check for dependent task in task flags Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 0/8] Add SPDX 3 Recipe Information Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 1/8] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 2/8] gcc-source: " Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 3/8] spdx3: Add recipe SPDX data Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 4/8] spdx3: Add recipe SBoM task Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 5/8] spdx3: Add is-native property Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 6/8] spdx30: Include patch file information in VEX Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 7/8] spdx: De-duplicate CreationInfo Joshua Watt
2026-02-24 23:00 ` [OE-core][PATCH v2 8/8] spdx_common: Check for dependent task in task flags Joshua Watt
2026-02-26 12:52 ` [OE-core][PATCH v2 0/8] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-02-26 14:27 ` Benjamin Robin
2026-02-26 15:09 ` Benjamin Robin
2026-02-26 15:41 ` Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 " Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 1/8] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 2/8] gcc-source: " Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 3/8] spdx3: Add recipe SPDX data Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 4/8] spdx3: Add recipe SBoM task Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 5/8] spdx3: Add is-native property Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 6/8] spdx30: Include patch file information in VEX Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 7/8] spdx: De-duplicate CreationInfo Joshua Watt
2026-02-26 17:33 ` [OE-core][PATCH v3 8/8] spdx_common: Check for dependent task in task flags Joshua Watt
2026-02-27 7:32 ` [OE-core][PATCH v3 0/8] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-03-03 0:43 ` [OE-core][PATCH v4 0/9] " Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 1/9] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 2/9] gcc-source: " Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 3/9] spdx3: Add recipe SPDX data Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 4/9] spdx3: Add recipe SBoM task Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 5/9] spdx3: Add is-native property Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 6/9] spdx30: Include patch file information in VEX Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 7/9] spdx: De-duplicate CreationInfo Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 8/9] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-03 0:43 ` [OE-core][PATCH v4 9/9] spdx30: Skip install package CVE information Joshua Watt
2026-03-03 10:17 ` [OE-core][PATCH v4 0/9] Add SPDX 3 Recipe Information Antonin Godard
2026-03-03 14:08 ` Mathieu Dubois-Briand
2026-03-04 16:44 ` [OE-core][PATCH v5 00/13] " Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 01/13] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 02/13] gcc-source: " Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 03/13] spdx3: Add recipe SPDX data Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 04/13] spdx3: Add recipe SBoM task Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 05/13] spdx3: Add is-native property Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 06/13] spdx30: Include patch file information in VEX Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 07/13] spdx: De-duplicate CreationInfo Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 08/13] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 09/13] spdx30: Skip install package CVE information Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 10/13] dummy-sdk-package: Disable SPDX Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 11/13] spdx: Remove fatal errors for missing providers Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 12/13] spdx3: Use common variable for vardeps Joshua Watt
2026-03-04 16:44 ` [OE-core][PATCH v5 13/13] glibc-testsuite: Do not generate SPDX Joshua Watt
2026-03-05 19:59 ` [OE-core][PATCH v5 00/13] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-03-10 18:38 ` [OE-core][PATCH v6 00/15] " Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 01/15] llvm-project-source: Use allarch.bbclass Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 02/15] gcc-source: " Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 03/15] spdx3: Add recipe SPDX data Joshua Watt
2026-03-12 11:43 ` Richard Purdie
2026-03-12 14:11 ` Joshua Watt
2026-03-12 17:50 ` Richard Purdie
2026-03-10 18:38 ` [OE-core][PATCH v6 04/15] spdx3: Add recipe SBoM task Joshua Watt
2026-03-12 11:50 ` Richard Purdie
2026-03-12 14:12 ` Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 05/15] spdx3: Add is-native property Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 06/15] spdx30: Include patch file information in VEX Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 07/15] spdx: De-duplicate CreationInfo Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 08/15] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 09/15] spdx30: Skip install package CVE information Joshua Watt
2026-03-12 11:55 ` Richard Purdie
2026-03-12 14:15 ` Joshua Watt
2026-03-12 15:52 ` Richard Purdie
2026-03-12 16:11 ` Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 10/15] dummy-sdk-package: Disable SPDX Joshua Watt
2026-03-12 11:59 ` Richard Purdie
2026-03-12 14:24 ` Joshua Watt
2026-03-12 15:58 ` Richard Purdie
2026-03-12 16:06 ` Joshua Watt
2026-03-12 16:43 ` Joshua Watt
2026-03-12 18:02 ` Joshua Watt
2026-03-12 20:34 ` Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 11/15] spdx: Remove fatal errors for missing providers Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 12/15] spdx3: Use common variable for vardeps Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 13/15] glibc-testsuite: Do not generate SPDX Joshua Watt
2026-03-10 18:38 ` [OE-core][PATCH v6 14/15] spdx: Remove do_collect_spdx_deps task Joshua Watt
2026-03-11 13:55 ` [OE-core][PATCH v6 00/15] Add SPDX 3 Recipe Information Mathieu Dubois-Briand
2026-03-11 16:39 ` Joshua Watt
2026-03-11 19:33 ` Mathieu Dubois-Briand
2026-03-11 22:56 ` Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 00/12] " Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 01/12] spdx3: Add recipe SPDX data Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 02/12] spdx3: Add recipe SBoM task Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 03/12] spdx3: Add is-native property Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 04/12] spdx30: Include patch file information in VEX Joshua Watt
2026-03-18 13:44 ` Joshua Watt [this message]
2026-03-18 13:44 ` [OE-core][PATCH v7 06/12] spdx_common: Check for dependent task in task flags Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 07/12] spdx30: Remove package VEX Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 08/12] spdx: Remove fatal errors for missing providers Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 09/12] spdx3: Use common variable for vardeps Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 10/12] glibc-testsuite: Do not generate SPDX Joshua Watt
2026-03-18 13:44 ` [OE-core][PATCH v7 11/12] spdx: Remove do_collect_spdx_deps task Joshua Watt
2026-03-18 13:49 ` [OE-core][PATCH v7 00/12] Add SPDX 3 Recipe Information Joshua Watt
2026-03-19 7:07 ` Mathieu Dubois-Briand
2026-03-19 12:02 ` Mathieu Dubois-Briand
2026-03-19 21:55 ` Joshua Watt
2026-03-19 22:14 ` Richard Purdie
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260318134655.953233-6-JPEWhacker@gmail.com \
--to=jpewhacker@gmail.com \
--cc=openembedded-core@lists.openembedded.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox