From 0767df7a2aa81065b5743a61d2736f9f16eed4d6 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 22 Feb 2024 16:28:33 -0600 Subject: [PATCH 01/11] Start of Windows artifact SBOMs --- sbom.py | 109 +++++++++++++++++++++++++++- windows-release/azure-pipelines.yml | 8 +- windows-release/stage-sbom.yml | 45 ++++++++++++ 3 files changed, 159 insertions(+), 3 deletions(-) create mode 100644 windows-release/stage-sbom.yml diff --git a/sbom.py b/sbom.py index ee7da07f..96e22569 100644 --- a/sbom.py +++ b/sbom.py @@ -525,10 +525,115 @@ def create_sbom_for_source_tarball(tarball_path: str): return sbom_data +def create_sbom_for_windows_artifact(exe_path): + exe_name = os.path.basename(exe_path) + cpython_version = re.match(r"^python-([0-9abrc.]+)(?:-|\.exe)", exe_name).group(1) + cpython_version_without_suffix = re.match(r"^([0-9.]+)", cpython_version).group(1) + exe_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{exe_name}" + + with open(exe_path, mode="rb") as f: + exe_checksum_sha256 = hashlib.sha256(f.read()).hexdigest() + + # Start with the CPython source SBOM as a base + with open("Misc/externals.spdx.json") as f: + sbom_data = json.loads(f.read()) + + # Add all the packages from the source SBOM + # We want to skip the file information because + # the files aren't available in Windows artifacts. + with open("Misc/sbom.spdx.json") as f: + source_sbom_data = json.loads(f.read()) + for sbom_package in source_sbom_data["packages"]: + sbom_data["packages"].append(sbom_package) + + sbom_data["relationships"] = [] + sbom_data["files"] = [] + + sbom_data.update({ + "SPDXID": "SPDXRef-DOCUMENT", + "spdxVersion": "SPDX-2.3", + "name": "CPython SBOM", + "dataLicense": "CC0-1.0", + # Naming done according to OpenSSF SBOM WG recommendations. + # See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md + "documentNamespace": f"{exe_download_location}.spdx.json", + "creationInfo": { + "created": ( + datetime.datetime.now(tz=datetime.timezone.utc) + .strftime("%Y-%m-%dT%H:%M:%SZ") + ), + "creators": [ + "Person: Python Release Managers", + f"Tool: ReleaseTools-{get_release_tools_commit_sha()}", + ], + # Version of the SPDX License ID list. + # This shouldn't need to be updated often, if ever. + "licenseListVersion": "3.22", + }, + }) + + # Create the SBOM entry for the CPython package. We use + # the SPDXID later on for creating relationships to files. + sbom_cpython_package = { + "SPDXID": "SPDXRef-PACKAGE-cpython", + "name": "CPython", + "versionInfo": cpython_version, + "licenseConcluded": "PSF-2.0", + "originator": "Organization: Python Software Foundation", + "supplier": "Organization: Python Software Foundation", + "packageFileName": exe_name, + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": f"cpe:2.3:a:python:python:{cpython_version}:*:*:*:*:*:*:*", + "referenceType": "cpe23Type", + } + ], + "primaryPackagePurpose": "APPLICATION", + "downloadLocation": exe_download_location, + "checksums": [{"algorithm": "SHA256", "checksumValue": exe_checksum_sha256}], + } + + # The top-level CPython package depends on every vendored sub-package. + for sbom_package in sbom_data["packages"]: + sbom_data["relationships"].append({ + "spdxElementId": sbom_cpython_package["SPDXID"], + "relatedSpdxElement": sbom_package["SPDXID"], + "relationshipType": "DEPENDS_ON", + }) + + sbom_data["packages"].append(sbom_cpython_package) + + # Final relationship, this SBOM describes the CPython package. + sbom_data["relationships"].append( + { + "spdxElementId": "SPDXRef-DOCUMENT", + "relatedSpdxElement": sbom_cpython_package["SPDXID"], + "relationshipType": "DESCRIBES", + } + ) + + # Apply the 'supplier' tag to every package since we're shipping + # the package in the tarball itself. Originator field is used for maintainers. + for sbom_package in sbom_data["packages"]: + sbom_package["supplier"] = "Organization: Python Software Foundation" + # Source packages have been compiled. + if sbom_package["primaryPackagePurpose"] == "SOURCE": + sbom_package["primaryPackagePurpose"] = "LIBRARY" + + normalize_sbom_data(sbom_data) + + return sbom_data + + def main() -> None: - tarball_path = sys.argv[1] - sbom_data = create_sbom_for_source_tarball(tarball_path) + artifact_path = sys.argv[1] + if artifact_path.endswith(".exe"): + sbom_data = create_sbom_for_windows_artifact(artifact_path) + else: + sbom_data = create_sbom_for_source_tarball(artifact_path) print(json.dumps(sbom_data, indent=2, sort_keys=True)) + if __name__ == "__main__": main() diff --git a/windows-release/azure-pipelines.yml b/windows-release/azure-pipelines.yml index 8dde308c..85e3b016 100644 --- a/windows-release/azure-pipelines.yml +++ b/windows-release/azure-pipelines.yml @@ -147,6 +147,12 @@ stages: SigningCertificate: ${{ parameters.SigningCertificate }} DoFreethreaded: ${{ parameters.DoFreethreaded }} + - stage: SBOM + displayName: Create SBOMs + dependsOn: Build + jobs: + - template: stage-sbom.yml + - stage: Layout displayName: Generate layouts dependsOn: Sign @@ -220,7 +226,7 @@ stages: - ${{ if eq(parameters.DoMSI, 'true') }}: - stage: PublishPyDotOrg displayName: Publish to python.org - dependsOn: ['Test_MSI', 'Test'] + dependsOn: ['SBOM', 'Test_MSI', 'Test'] jobs: - template: stage-publish-pythonorg.yml diff --git a/windows-release/stage-sbom.yml b/windows-release/stage-sbom.yml new file mode 100644 index 00000000..257f0305 --- /dev/null +++ b/windows-release/stage-sbom.yml @@ -0,0 +1,45 @@ +jobs: +- job: SBOM_Files + displayName: Create SBOMs for Python binaries + + pool: + vmImage: windows-2022 + + workspace: + clean: all + + strategy: + matrix: + win32: + Name: win32 + amd64: + Name: amd64 + arm64: + Name: arm64 + + steps: + - task: UsePythonVersion@0 + displayName: 'Use Python 3.6 or later' + inputs: + versionSpec: '>=3.6' + + - template: ./checkout.yml + + - task: DownloadPipelineArtifact@1 + displayName: 'Download artifact: bin_$(Name)' + inputs: + artifactName: bin_$(Name) + targetPath: $(Build.BinariesDirectory)\bin + + - powershell: > + python + "$(Build.SourcesDirectory)\sbom.py" + (gci msi\*\python-*.exe | select -First 1) + workingDirectory: $(Build.BinariesDirectory) + displayName: 'Create SBOMs for binaries' + + - task: PublishPipelineArtifact@0 + displayName: 'Publish artifact: sbom' + inputs: + targetPath: '$(Build.BinariesDirectory)\sbom' + artifactName: sbom From 0cc576972d4e82890f2cc622b9caff2dcc6c23f9 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 23 Feb 2024 10:44:25 -0600 Subject: [PATCH 02/11] Address review feedback --- sbom.py | 16 ++++++++++------ windows-release/azure-pipelines.yml | 2 +- windows-release/stage-sbom.yml | 17 ++++++++++++----- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/sbom.py b/sbom.py index 96e22569..4029d0fb 100644 --- a/sbom.py +++ b/sbom.py @@ -627,12 +627,16 @@ def create_sbom_for_windows_artifact(exe_path): def main() -> None: - artifact_path = sys.argv[1] - if artifact_path.endswith(".exe"): - sbom_data = create_sbom_for_windows_artifact(artifact_path) - else: - sbom_data = create_sbom_for_source_tarball(artifact_path) - print(json.dumps(sbom_data, indent=2, sort_keys=True)) + artifact_paths = sys.argv[1:] + for artifact_path in artifact_paths: + if artifact_path.endswith(".exe"): + sbom_data = create_sbom_for_windows_artifact(artifact_path) + else: + sbom_data = create_sbom_for_source_tarball(artifact_path) + + with open(artifact_path + ".spdx.json", mode="w") as f: + f.truncate() + f.write(json.dumps(sbom_data, indent=2, sort_keys=True)) if __name__ == "__main__": diff --git a/windows-release/azure-pipelines.yml b/windows-release/azure-pipelines.yml index 85e3b016..6b737bd1 100644 --- a/windows-release/azure-pipelines.yml +++ b/windows-release/azure-pipelines.yml @@ -226,7 +226,7 @@ stages: - ${{ if eq(parameters.DoMSI, 'true') }}: - stage: PublishPyDotOrg displayName: Publish to python.org - dependsOn: ['SBOM', 'Test_MSI', 'Test'] + dependsOn: ['Test_MSI', 'Test'] jobs: - template: stage-publish-pythonorg.yml diff --git a/windows-release/stage-sbom.yml b/windows-release/stage-sbom.yml index 257f0305..37c76866 100644 --- a/windows-release/stage-sbom.yml +++ b/windows-release/stage-sbom.yml @@ -34,12 +34,19 @@ jobs: - powershell: > python "$(Build.SourcesDirectory)\sbom.py" - (gci msi\*\python-*.exe | select -First 1) + (gci msi\*\python-*.exe) workingDirectory: $(Build.BinariesDirectory) displayName: 'Create SBOMs for binaries' - - task: PublishPipelineArtifact@0 - displayName: 'Publish artifact: sbom' + - task: CopyFiles@2 + displayName: 'Layout Artifact: sbom' inputs: - targetPath: '$(Build.BinariesDirectory)\sbom' - artifactName: sbom + sourceFolder: $(Build.BinariesDirectory)\bin + targetFolder: $(Build.ArtifactStagingDirectory)\sbom + flatten: true + contents: | + **\*.spdx.json + + - publish: '$(Build.ArtifactStagingDirectory)\sbom' + artifact: sbom + displayName: 'Publish artifact: sbom' From 2af8c583c07b2f5dddb853627cbfa4fbbe3dabc8 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 23 Feb 2024 10:57:04 -0600 Subject: [PATCH 03/11] Deduplicate top-level and CPython SBOM generation steps --- sbom.py | 155 ++++++++++++++++++++------------------------------------ 1 file changed, 54 insertions(+), 101 deletions(-) diff --git a/sbom.py b/sbom.py index 4029d0fb..ce5ebfe9 100644 --- a/sbom.py +++ b/sbom.py @@ -316,38 +316,20 @@ def create_pip_sbom_from_wheel( ) -def create_sbom_for_source_tarball(tarball_path: str): - """Stitches together an SBOM for a source tarball""" - tarball_name = os.path.basename(tarball_path) - - # Open the tarball with known compression settings. - if tarball_name.endswith(".tgz"): - tarball = tarfile.open(tarball_path, mode="r:gz") - elif tarball_name.endswith(".tar.xz"): - tarball = tarfile.open(tarball_path, mode="r:xz") - else: - raise ValueError(f"Unknown tarball format: '{tarball_name}'") +def create_cpython_sbom( + sbom_data: dict[str, typing.Any], + cpython_version: str, + artifact_path: str, +): + """Creates the top-level SBOM metadata and the CPython SBOM package.""" - # Parse the CPython version from the tarball. - # Calculate the download locations from the CPython version and tarball name. - cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1) cpython_version_without_suffix = re.match(r"^([0-9.]+)", cpython_version).group(1) - tarball_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{tarball_name}" - - # Take a hash of the tarball - with open(tarball_path, mode="rb") as f: - tarball_checksum_sha256 = hashlib.sha256(f.read()).hexdigest() + artifact_name = os.path.basename(artifact_path) + artifact_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{artifact_name}" - # There should be an SBOM included in the tarball. - # If there's not we can't create an SBOM. - try: - sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json") - except KeyError: - raise ValueError( - "Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'" - ) from None - sbom_bytes = tarball.extractfile(sbom_tarball_member).read() - sbom_data = json.loads(sbom_bytes) + # Take a hash of the artifact + with open(artifact_path, mode="rb") as f: + artifact_checksum_sha256 = hashlib.sha256(f.read()).hexdigest() sbom_data.update({ "SPDXID": "SPDXRef-DOCUMENT", @@ -356,7 +338,7 @@ def create_sbom_for_source_tarball(tarball_path: str): "dataLicense": "CC0-1.0", # Naming done according to OpenSSF SBOM WG recommendations. # See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md - "documentNamespace": f"{tarball_download_location}.spdx.json", + "documentNamespace": f"{artifact_download_location}.spdx.json", "creationInfo": { "created": ( datetime.datetime.now(tz=datetime.timezone.utc) @@ -381,7 +363,7 @@ def create_sbom_for_source_tarball(tarball_path: str): "licenseConcluded": "PSF-2.0", "originator": "Organization: Python Software Foundation", "supplier": "Organization: Python Software Foundation", - "packageFileName": tarball_name, + "packageFileName": artifact_name, "externalRefs": [ { "referenceCategory": "SECURITY", @@ -390,8 +372,8 @@ def create_sbom_for_source_tarball(tarball_path: str): } ], "primaryPackagePurpose": "SOURCE", - "downloadLocation": tarball_download_location, - "checksums": [{"algorithm": "SHA256", "checksumValue": tarball_checksum_sha256}], + "downloadLocation": artifact_download_location, + "checksums": [{"algorithm": "SHA256", "checksumValue": artifact_checksum_sha256}], } # The top-level CPython package depends on every vendored sub-package. @@ -404,6 +386,37 @@ def create_sbom_for_source_tarball(tarball_path: str): sbom_data["packages"].append(sbom_cpython_package) + +def create_sbom_for_source_tarball(tarball_path: str): + """Stitches together an SBOM for a source tarball""" + tarball_name = os.path.basename(tarball_path) + + # Open the tarball with known compression settings. + if tarball_name.endswith(".tgz"): + tarball = tarfile.open(tarball_path, mode="r:gz") + elif tarball_name.endswith(".tar.xz"): + tarball = tarfile.open(tarball_path, mode="r:xz") + else: + raise ValueError(f"Unknown tarball format: '{tarball_name}'") + + # Parse the CPython version from the tarball. + # Calculate the download locations from the CPython version and tarball name. + cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1) + + # There should be an SBOM included in the tarball. + # If there's not we can't create an SBOM. + try: + sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json") + except KeyError: + raise ValueError( + "Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'" + ) from None + sbom_bytes = tarball.extractfile(sbom_tarball_member).read() + sbom_data = json.loads(sbom_bytes) + + create_cpython_sbom(sbom_data, cpython_version=cpython_version, artifact_path=tarball_path) + sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython") + # Find the pip wheel in ensurepip in the tarball for member in tarball.getmembers(): match = re.match(rf"^Python-{cpython_version}/Lib/ensurepip/_bundled/(pip-.*\.whl)$", member.name) @@ -487,7 +500,7 @@ def create_sbom_for_source_tarball(tarball_path: str): ) sbom_data["relationships"].append( { - "spdxElementId": sbom_cpython_package["SPDXID"], + "spdxElementId": sbom_cpython_package_spdx_id, "relatedSpdxElement": sbom_file_spdx_id, "relationshipType": "CONTAINS", } @@ -505,7 +518,7 @@ def create_sbom_for_source_tarball(tarball_path: str): sbom_data["relationships"].append( { "spdxElementId": "SPDXRef-DOCUMENT", - "relatedSpdxElement": sbom_cpython_package["SPDXID"], + "relatedSpdxElement": sbom_cpython_package_spdx_id, "relationshipType": "DESCRIBES", } ) @@ -519,20 +532,12 @@ def create_sbom_for_source_tarball(tarball_path: str): # Calculate the 'packageVerificationCode' values for files in packages. calculate_package_verification_codes(sbom_data) - # Normalize SBOM structures for reproducibility. - normalize_sbom_data(sbom_data) - return sbom_data def create_sbom_for_windows_artifact(exe_path): exe_name = os.path.basename(exe_path) cpython_version = re.match(r"^python-([0-9abrc.]+)(?:-|\.exe)", exe_name).group(1) - cpython_version_without_suffix = re.match(r"^([0-9.]+)", cpython_version).group(1) - exe_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{exe_name}" - - with open(exe_path, mode="rb") as f: - exe_checksum_sha256 = hashlib.sha256(f.read()).hexdigest() # Start with the CPython source SBOM as a base with open("Misc/externals.spdx.json") as f: @@ -549,80 +554,26 @@ def create_sbom_for_windows_artifact(exe_path): sbom_data["relationships"] = [] sbom_data["files"] = [] - sbom_data.update({ - "SPDXID": "SPDXRef-DOCUMENT", - "spdxVersion": "SPDX-2.3", - "name": "CPython SBOM", - "dataLicense": "CC0-1.0", - # Naming done according to OpenSSF SBOM WG recommendations. - # See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md - "documentNamespace": f"{exe_download_location}.spdx.json", - "creationInfo": { - "created": ( - datetime.datetime.now(tz=datetime.timezone.utc) - .strftime("%Y-%m-%dT%H:%M:%SZ") - ), - "creators": [ - "Person: Python Release Managers", - f"Tool: ReleaseTools-{get_release_tools_commit_sha()}", - ], - # Version of the SPDX License ID list. - # This shouldn't need to be updated often, if ever. - "licenseListVersion": "3.22", - }, - }) - - # Create the SBOM entry for the CPython package. We use - # the SPDXID later on for creating relationships to files. - sbom_cpython_package = { - "SPDXID": "SPDXRef-PACKAGE-cpython", - "name": "CPython", - "versionInfo": cpython_version, - "licenseConcluded": "PSF-2.0", - "originator": "Organization: Python Software Foundation", - "supplier": "Organization: Python Software Foundation", - "packageFileName": exe_name, - "externalRefs": [ - { - "referenceCategory": "SECURITY", - "referenceLocator": f"cpe:2.3:a:python:python:{cpython_version}:*:*:*:*:*:*:*", - "referenceType": "cpe23Type", - } - ], - "primaryPackagePurpose": "APPLICATION", - "downloadLocation": exe_download_location, - "checksums": [{"algorithm": "SHA256", "checksumValue": exe_checksum_sha256}], - } - - # The top-level CPython package depends on every vendored sub-package. - for sbom_package in sbom_data["packages"]: - sbom_data["relationships"].append({ - "spdxElementId": sbom_cpython_package["SPDXID"], - "relatedSpdxElement": sbom_package["SPDXID"], - "relationshipType": "DEPENDS_ON", - }) - - sbom_data["packages"].append(sbom_cpython_package) + create_cpython_sbom(sbom_data, cpython_version=cpython_version, artifact_path=exe_path) + sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython") # Final relationship, this SBOM describes the CPython package. sbom_data["relationships"].append( { "spdxElementId": "SPDXRef-DOCUMENT", - "relatedSpdxElement": sbom_cpython_package["SPDXID"], + "relatedSpdxElement": sbom_cpython_package_spdx_id, "relationshipType": "DESCRIBES", } ) # Apply the 'supplier' tag to every package since we're shipping - # the package in the tarball itself. Originator field is used for maintainers. + # the package in the artifact itself. Originator field is used for maintainers. for sbom_package in sbom_data["packages"]: sbom_package["supplier"] = "Organization: Python Software Foundation" # Source packages have been compiled. if sbom_package["primaryPackagePurpose"] == "SOURCE": sbom_package["primaryPackagePurpose"] = "LIBRARY" - normalize_sbom_data(sbom_data) - return sbom_data @@ -634,6 +585,8 @@ def main() -> None: else: sbom_data = create_sbom_for_source_tarball(artifact_path) + # Normalize SBOM data for reproducibility. + normalize_sbom_data(sbom_data) with open(artifact_path + ".spdx.json", mode="w") as f: f.truncate() f.write(json.dumps(sbom_data, indent=2, sort_keys=True)) From 06842ade40d19b7f8cecad491a792f2d4960711b Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 23 Feb 2024 15:57:06 -0600 Subject: [PATCH 04/11] Add test cases for base CPython SBOM generation --- tests/fake-artifact.txt | 0 tests/test_sbom.py | 81 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tests/fake-artifact.txt diff --git a/tests/fake-artifact.txt b/tests/fake-artifact.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_sbom.py b/tests/test_sbom.py index 427e895c..7cce88b6 100644 --- a/tests/test_sbom.py +++ b/tests/test_sbom.py @@ -1,6 +1,8 @@ +import pathlib import json import random import hashlib +import re import unittest.mock import pytest @@ -65,7 +67,6 @@ def test_normalization(): def test_fetch_project_metadata_from_pypi(mocker): - mock_urlopen = mocker.patch("sbom.urlopen") mock_urlopen.return_value = unittest.mock.Mock() @@ -116,3 +117,81 @@ def test_fetch_project_metadata_from_pypi(mocker): assert download_url == "https://files.pythonhosted.org/packages/.../pip-24.0.tar.gz" assert checksum_sha256 == "ea9bd1a847e8c5774a5777bb398c19e80bcd4e2aa16a4b301b718fe6f593aba2" + + +def test_create_cpython_sbom(): + sbom_data = {"packages": []} + + artifact_path = str(pathlib.Path(__file__).parent / "fake-artifact.txt") + sbom.create_cpython_sbom( + sbom_data, cpython_version="3.13.0", artifact_path=artifact_path + ) + + assert re.fullmatch( + r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", + sbom_data["creationInfo"].pop("created") + ) + assert re.fullmatch( + r"^Tool: ReleaseTools-[a-f0-9]+$", + sbom_data["creationInfo"]["creators"].pop(1) + ) + + assert sbom_data == { + "packages": [ + { + "SPDXID": "SPDXRef-PACKAGE-cpython", + "name": "CPython", + "versionInfo": "3.13.0", + "licenseConcluded": "PSF-2.0", + "originator": "Organization: Python Software Foundation", + "supplier": "Organization: Python Software Foundation", + "packageFileName": "fake-artifact.txt", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:python:python:3.13.0:*:*:*:*:*:*:*", + "referenceType": "cpe23Type", + } + ], + "primaryPackagePurpose": "SOURCE", + "downloadLocation": "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + } + ], + } + ], + "SPDXID": "SPDXRef-DOCUMENT", + "spdxVersion": "SPDX-2.3", + "name": "CPython SBOM", + "dataLicense": "CC0-1.0", + "documentNamespace": "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt.spdx.json", + "creationInfo": { + "creators": [ + "Person: Python Release Managers", + ], + "licenseListVersion": "3.22", + }, + } + + +@pytest.mark.parametrize( + ["cpython_version", "download_location"], + [ + ("3.13.0", "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt"), + ("3.11.0a1", "https://www.python.org/ftp/python/3.11.0/fake-artifact.txt"), + ("3.12.0b2", "https://www.python.org/ftp/python/3.12.0/fake-artifact.txt"), + ("3.13.0rc3", "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt"), + ] +) +def test_create_cpython_sbom_pre_release_download_location(cpython_version, download_location): + sbom_data = {"packages": []} + + artifact_path = str(pathlib.Path(__file__).parent / "fake-artifact.txt") + sbom.create_cpython_sbom( + sbom_data, cpython_version=cpython_version, artifact_path=artifact_path + ) + + assert sbom_data["packages"][0]["downloadLocation"] == download_location From 5306100fed6c20281a834ac688c0491dec515cc3 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 26 Feb 2024 15:24:27 -0600 Subject: [PATCH 05/11] Add --cpython-source-dir option --- sbom.py | 65 +++++++++++++++++++++++++++------- windows-release/stage-sbom.yml | 1 + 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/sbom.py b/sbom.py index ce5ebfe9..b2c26da4 100644 --- a/sbom.py +++ b/sbom.py @@ -10,11 +10,13 @@ """ +import argparse import datetime import hashlib import io import json import os +import pathlib import re import subprocess import sys @@ -535,28 +537,55 @@ def create_sbom_for_source_tarball(tarball_path: str): return sbom_data -def create_sbom_for_windows_artifact(exe_path): - exe_name = os.path.basename(exe_path) - cpython_version = re.match(r"^python-([0-9abrc.]+)(?:-|\.exe)", exe_name).group(1) +def create_sbom_for_windows_artifact(artifact_path, cpython_source_dir: str): + artifact_name = os.path.basename(artifact_path) + cpython_version = re.match(r"^python-([0-9abrc.]+)(?:-|\.exe|\.zip)", artifact_name).group(1) + + if not cpython_source_dir: + raise ValueError("Must specify --cpython-source-dir for Windows artifacts") + cpython_source_dir = pathlib.Path(cpython_source_dir) # Start with the CPython source SBOM as a base - with open("Misc/externals.spdx.json") as f: + with (cpython_source_dir / "Misc/externals.spdx.json").open() as f: sbom_data = json.loads(f.read()) + sbom_data["relationships"] = [] + sbom_data["files"] = [] + # Add all the packages from the source SBOM # We want to skip the file information because # the files aren't available in Windows artifacts. - with open("Misc/sbom.spdx.json") as f: + with (cpython_source_dir / "Misc/sbom.spdx.json").open() as f: source_sbom_data = json.loads(f.read()) for sbom_package in source_sbom_data["packages"]: sbom_data["packages"].append(sbom_package) - sbom_data["relationships"] = [] - sbom_data["files"] = [] - - create_cpython_sbom(sbom_data, cpython_version=cpython_version, artifact_path=exe_path) + create_cpython_sbom( + sbom_data, + cpython_version=cpython_version, + artifact_path=artifact_path + ) sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython") + # The Windows embed artifacts don't contain pip/ensurepip, + # but the MSI artifacts do. Add pip for MSI installers. + if artifact_name.endswith(".exe"): + + # Find the pip wheel in ensurepip in the source code + for pathname in os.listdir(cpython_source_dir / "Lib/ensurepip/_bundled"): + if pathname.startswith("pip-") and pathname.endswith(".whl"): + pip_wheel_filename = pathname + pip_wheel_bytes = (cpython_source_dir / f"Lib/ensurepip/_bundled/{pathname}").read_bytes() + break + else: + raise ValueError("Could not find pip wheel in 'Lib/ensurepip/_bundled/...'") + + create_pip_sbom_from_wheel( + sbom_data, + pip_wheel_filename=pip_wheel_filename, + pip_wheel_bytes=pip_wheel_bytes, + ) + # Final relationship, this SBOM describes the CPython package. sbom_data["relationships"].append( { @@ -578,10 +607,22 @@ def create_sbom_for_windows_artifact(exe_path): def main() -> None: - artifact_paths = sys.argv[1:] + parser = argparse.ArgumentParser() + parser.add_argument("--cpython-source-dir", default=None) + parser.add_argument("artifacts", nargs="+") + parsed_args = parser.parse_args(sys.argv[1:]) + + artifact_paths = parsed_args.artifacts + cpython_source_dir = parsed_args.cpython_source_dir + for artifact_path in artifact_paths: - if artifact_path.endswith(".exe"): - sbom_data = create_sbom_for_windows_artifact(artifact_path) + # Windows MSI and Embed artifacts + if artifact_path.endswith(".exe") or artifact_path.endswith(".zip"): + sbom_data = create_sbom_for_windows_artifact( + artifact_path, + cpython_source_dir=cpython_source_dir + ) + # Source artifacts else: sbom_data = create_sbom_for_source_tarball(artifact_path) diff --git a/windows-release/stage-sbom.yml b/windows-release/stage-sbom.yml index 37c76866..969d3f40 100644 --- a/windows-release/stage-sbom.yml +++ b/windows-release/stage-sbom.yml @@ -34,6 +34,7 @@ jobs: - powershell: > python "$(Build.SourcesDirectory)\sbom.py" + "--cpython-source-dir=$(Build.SourcesDirectory)" (gci msi\*\python-*.exe) workingDirectory: $(Build.BinariesDirectory) displayName: 'Create SBOMs for binaries' From 1d23d2da532a389877b86a6c88ad5f29838bcb26 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 4 Apr 2024 15:35:35 -0500 Subject: [PATCH 06/11] Move SBOM steps to MSI and embed stages --- windows-release/azure-pipelines.yml | 6 --- windows-release/msi-steps.yml | 23 +++++++++++ windows-release/stage-layout-embed.yml | 23 +++++++++++ windows-release/stage-sbom.yml | 53 -------------------------- 4 files changed, 46 insertions(+), 59 deletions(-) delete mode 100644 windows-release/stage-sbom.yml diff --git a/windows-release/azure-pipelines.yml b/windows-release/azure-pipelines.yml index 6b737bd1..8dde308c 100644 --- a/windows-release/azure-pipelines.yml +++ b/windows-release/azure-pipelines.yml @@ -147,12 +147,6 @@ stages: SigningCertificate: ${{ parameters.SigningCertificate }} DoFreethreaded: ${{ parameters.DoFreethreaded }} - - stage: SBOM - displayName: Create SBOMs - dependsOn: Build - jobs: - - template: stage-sbom.yml - - stage: Layout displayName: Generate layouts dependsOn: Sign diff --git a/windows-release/msi-steps.yml b/windows-release/msi-steps.yml index 0a18dbfc..b09aa66f 100644 --- a/windows-release/msi-steps.yml +++ b/windows-release/msi-steps.yml @@ -114,3 +114,26 @@ steps: - publish: '$(Build.ArtifactStagingDirectory)\msi' artifact: msi displayName: 'Publish MSI' + + - powershell: > + $(Python) + "$(Build.SourcesDirectory)\sbom.py" + "--cpython-source-dir=$(Build.SourcesDirectory)" + $(gci -r "$(Build.ArtifactStagingDirectory)\msi\**\python-*.exe") + workingDirectory: $(Build.BinariesDirectory) + displayName: 'Create SBOMs for binaries' + + - task: CopyFiles@2 + displayName: 'Layout Artifact: sbom' + inputs: + sourceFolder: $(Build.ArtifactStagingDirectory)\msi + targetFolder: $(Build.ArtifactStagingDirectory)\sbom + flatten: true + contents: | + **\*.spdx.json + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: sbom_msi_$(Name)' + inputs: + PathtoPublish: '$(Build.ArtifactStagingDirectory)\sbom' + ArtifactName: sbom_msi_$(Name) diff --git a/windows-release/stage-layout-embed.yml b/windows-release/stage-layout-embed.yml index 6563ab5d..2d228b16 100644 --- a/windows-release/stage-layout-embed.yml +++ b/windows-release/stage-layout-embed.yml @@ -55,3 +55,26 @@ jobs: inputs: PathtoPublish: '$(Build.ArtifactStagingDirectory)\embed' ArtifactName: embed + + - powershell: > + $(Python) + "$(Build.SourcesDirectory)\sbom.py" + "--cpython-source-dir=$(Build.SourcesDirectory)" + "$(Build.ArtifactStagingDirectory)\embed\python-$(VersionText)-embed-$(Name).zip" + workingDirectory: $(Build.BinariesDirectory) + displayName: 'Create SBOMs for binaries' + + - task: CopyFiles@2 + displayName: 'Layout Artifact: sbom' + inputs: + sourceFolder: $(Build.ArtifactStagingDirectory)\embed + targetFolder: $(Build.ArtifactStagingDirectory)\sbom + flatten: true + contents: | + **\*.spdx.json + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: sbom_embed_$(Name)' + inputs: + PathtoPublish: '$(Build.ArtifactStagingDirectory)\sbom' + ArtifactName: sbom_embed_$(Name) diff --git a/windows-release/stage-sbom.yml b/windows-release/stage-sbom.yml deleted file mode 100644 index 969d3f40..00000000 --- a/windows-release/stage-sbom.yml +++ /dev/null @@ -1,53 +0,0 @@ -jobs: -- job: SBOM_Files - displayName: Create SBOMs for Python binaries - - pool: - vmImage: windows-2022 - - workspace: - clean: all - - strategy: - matrix: - win32: - Name: win32 - amd64: - Name: amd64 - arm64: - Name: arm64 - - steps: - - task: UsePythonVersion@0 - displayName: 'Use Python 3.6 or later' - inputs: - versionSpec: '>=3.6' - - - template: ./checkout.yml - - - task: DownloadPipelineArtifact@1 - displayName: 'Download artifact: bin_$(Name)' - inputs: - artifactName: bin_$(Name) - targetPath: $(Build.BinariesDirectory)\bin - - - powershell: > - python - "$(Build.SourcesDirectory)\sbom.py" - "--cpython-source-dir=$(Build.SourcesDirectory)" - (gci msi\*\python-*.exe) - workingDirectory: $(Build.BinariesDirectory) - displayName: 'Create SBOMs for binaries' - - - task: CopyFiles@2 - displayName: 'Layout Artifact: sbom' - inputs: - sourceFolder: $(Build.BinariesDirectory)\bin - targetFolder: $(Build.ArtifactStagingDirectory)\sbom - flatten: true - contents: | - **\*.spdx.json - - - publish: '$(Build.ArtifactStagingDirectory)\sbom' - artifact: sbom - displayName: 'Publish artifact: sbom' From c01765dacd7876de68e712362e2f8015aafae02b Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 5 Apr 2024 09:00:25 -0500 Subject: [PATCH 07/11] Move all publishing steps to the end of jobs --- windows-release/msi-steps.yml | 10 +++++----- windows-release/stage-layout-embed.yml | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/windows-release/msi-steps.yml b/windows-release/msi-steps.yml index b09aa66f..e80ed1aa 100644 --- a/windows-release/msi-steps.yml +++ b/windows-release/msi-steps.yml @@ -111,12 +111,8 @@ steps: *.cab *.exe - - publish: '$(Build.ArtifactStagingDirectory)\msi' - artifact: msi - displayName: 'Publish MSI' - - powershell: > - $(Python) + & "$(Python)" "$(Build.SourcesDirectory)\sbom.py" "--cpython-source-dir=$(Build.SourcesDirectory)" $(gci -r "$(Build.ArtifactStagingDirectory)\msi\**\python-*.exe") @@ -132,6 +128,10 @@ steps: contents: | **\*.spdx.json + - publish: '$(Build.ArtifactStagingDirectory)\msi' + artifact: msi + displayName: 'Publish MSI' + - task: PublishBuildArtifacts@1 displayName: 'Publish Artifact: sbom_msi_$(Name)' inputs: diff --git a/windows-release/stage-layout-embed.yml b/windows-release/stage-layout-embed.yml index 2d228b16..bbbb50ed 100644 --- a/windows-release/stage-layout-embed.yml +++ b/windows-release/stage-layout-embed.yml @@ -46,18 +46,8 @@ jobs: --preset-embed displayName: 'Generate embeddable layout' - - publish: '$(Build.ArtifactStagingDirectory)\layout' - artifact: layout_embed_$(Name) - displayName: 'Publish Artifact: layout_embed_$(Name)' - - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: embed' - inputs: - PathtoPublish: '$(Build.ArtifactStagingDirectory)\embed' - ArtifactName: embed - - powershell: > - $(Python) + & "$(Python)" "$(Build.SourcesDirectory)\sbom.py" "--cpython-source-dir=$(Build.SourcesDirectory)" "$(Build.ArtifactStagingDirectory)\embed\python-$(VersionText)-embed-$(Name).zip" @@ -73,6 +63,16 @@ jobs: contents: | **\*.spdx.json + - publish: '$(Build.ArtifactStagingDirectory)\layout' + artifact: layout_embed_$(Name) + displayName: 'Publish Artifact: layout_embed_$(Name)' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: embed' + inputs: + PathtoPublish: '$(Build.ArtifactStagingDirectory)\embed' + ArtifactName: embed + - task: PublishBuildArtifacts@1 displayName: 'Publish Artifact: sbom_embed_$(Name)' inputs: From 04ee7a65197cdb2b6a884c66283f18cea7ea3851 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Apr 2024 12:41:22 -0500 Subject: [PATCH 08/11] Checkout release-tools manually, remove quotes --- windows-release/msi-steps.yml | 2 +- windows-release/stage-layout-embed.yml | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/windows-release/msi-steps.yml b/windows-release/msi-steps.yml index e80ed1aa..1a7d6803 100644 --- a/windows-release/msi-steps.yml +++ b/windows-release/msi-steps.yml @@ -112,7 +112,7 @@ steps: *.exe - powershell: > - & "$(Python)" + & $(Python) "$(Build.SourcesDirectory)\sbom.py" "--cpython-source-dir=$(Build.SourcesDirectory)" $(gci -r "$(Build.ArtifactStagingDirectory)\msi\**\python-*.exe") diff --git a/windows-release/stage-layout-embed.yml b/windows-release/stage-layout-embed.yml index bbbb50ed..bb572eef 100644 --- a/windows-release/stage-layout-embed.yml +++ b/windows-release/stage-layout-embed.yml @@ -46,9 +46,14 @@ jobs: --preset-embed displayName: 'Generate embeddable layout' + - powershell: > + git clone $(Build.Repository.Uri) -b $(Build.SourceBranch) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools" + git -C "$(Pipeline.Workspace)\release-tools" checkout $(Build.SourceVersion) + displayName: 'Clone the python/release-tools repository' + - powershell: > & "$(Python)" - "$(Build.SourcesDirectory)\sbom.py" + "$(Pipeline.Workspace)\release-tools\sbom.py" "--cpython-source-dir=$(Build.SourcesDirectory)" "$(Build.ArtifactStagingDirectory)\embed\python-$(VersionText)-embed-$(Name).zip" workingDirectory: $(Build.BinariesDirectory) From e9c9391228f5129c3eb2af6ae81104196e35a2bb Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Apr 2024 16:08:11 -0500 Subject: [PATCH 09/11] Also clone in msi-steps job, fix multi-command character --- windows-release/msi-steps.yml | 7 ++++++- windows-release/stage-layout-embed.yml | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/windows-release/msi-steps.yml b/windows-release/msi-steps.yml index 1a7d6803..57a803f7 100644 --- a/windows-release/msi-steps.yml +++ b/windows-release/msi-steps.yml @@ -111,9 +111,14 @@ steps: *.cab *.exe + - powershell: | + git clone $(Build.Repository.Uri) -b $(Build.SourceBranch) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools" + git -C "$(Pipeline.Workspace)\release-tools" checkout $(Build.SourceVersion) + displayName: 'Clone the python/release-tools repository' + - powershell: > & $(Python) - "$(Build.SourcesDirectory)\sbom.py" + "$(Pipeline.Workspace)\release-tools\sbom.py" "--cpython-source-dir=$(Build.SourcesDirectory)" $(gci -r "$(Build.ArtifactStagingDirectory)\msi\**\python-*.exe") workingDirectory: $(Build.BinariesDirectory) diff --git a/windows-release/stage-layout-embed.yml b/windows-release/stage-layout-embed.yml index bb572eef..dd87da7f 100644 --- a/windows-release/stage-layout-embed.yml +++ b/windows-release/stage-layout-embed.yml @@ -46,7 +46,7 @@ jobs: --preset-embed displayName: 'Generate embeddable layout' - - powershell: > + - powershell: | git clone $(Build.Repository.Uri) -b $(Build.SourceBranch) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools" git -C "$(Pipeline.Workspace)\release-tools" checkout $(Build.SourceVersion) displayName: 'Clone the python/release-tools repository' From 8cb1ff13da6d51ed5a519118d54388ca6ed75ca0 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Apr 2024 16:27:23 -0500 Subject: [PATCH 10/11] Use '$(Build.SourceBranchName)' --- windows-release/msi-steps.yml | 2 +- windows-release/stage-layout-embed.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/windows-release/msi-steps.yml b/windows-release/msi-steps.yml index 57a803f7..d0b09514 100644 --- a/windows-release/msi-steps.yml +++ b/windows-release/msi-steps.yml @@ -112,7 +112,7 @@ steps: *.exe - powershell: | - git clone $(Build.Repository.Uri) -b $(Build.SourceBranch) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools" + git clone $(Build.Repository.Uri) -b $(Build.SourceBranchName) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools" git -C "$(Pipeline.Workspace)\release-tools" checkout $(Build.SourceVersion) displayName: 'Clone the python/release-tools repository' diff --git a/windows-release/stage-layout-embed.yml b/windows-release/stage-layout-embed.yml index dd87da7f..e11242df 100644 --- a/windows-release/stage-layout-embed.yml +++ b/windows-release/stage-layout-embed.yml @@ -47,7 +47,7 @@ jobs: displayName: 'Generate embeddable layout' - powershell: | - git clone $(Build.Repository.Uri) -b $(Build.SourceBranch) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools" + git clone $(Build.Repository.Uri) -b $(Build.SourceBranchName) --single-branch --no-checkout "$(Pipeline.Workspace)\release-tools" git -C "$(Pipeline.Workspace)\release-tools" checkout $(Build.SourceVersion) displayName: 'Clone the python/release-tools repository' From b436169f6795dbde98a7a5b3584a618c87d55e7b Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Tue, 9 Apr 2024 15:13:34 -0500 Subject: [PATCH 11/11] Attempt to flatten all SBOM artifacts to one bucket --- windows-release/msi-steps.yml | 5 +++-- windows-release/stage-layout-embed.yml | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/windows-release/msi-steps.yml b/windows-release/msi-steps.yml index d0b09514..8c7442a9 100644 --- a/windows-release/msi-steps.yml +++ b/windows-release/msi-steps.yml @@ -130,6 +130,7 @@ steps: sourceFolder: $(Build.ArtifactStagingDirectory)\msi targetFolder: $(Build.ArtifactStagingDirectory)\sbom flatten: true + flattenFolders: true contents: | **\*.spdx.json @@ -138,7 +139,7 @@ steps: displayName: 'Publish MSI' - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: sbom_msi_$(Name)' + displayName: 'Publish Artifact: sbom' inputs: PathtoPublish: '$(Build.ArtifactStagingDirectory)\sbom' - ArtifactName: sbom_msi_$(Name) + ArtifactName: sbom diff --git a/windows-release/stage-layout-embed.yml b/windows-release/stage-layout-embed.yml index e11242df..a2888555 100644 --- a/windows-release/stage-layout-embed.yml +++ b/windows-release/stage-layout-embed.yml @@ -79,7 +79,7 @@ jobs: ArtifactName: embed - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: sbom_embed_$(Name)' + displayName: 'Publish Artifact: sbom' inputs: PathtoPublish: '$(Build.ArtifactStagingDirectory)\sbom' - ArtifactName: sbom_embed_$(Name) + ArtifactName: sbom