Skip to content

Commit

Permalink
Merge pull request #67 from yonhan3/umbrella-pid-feature
Browse files Browse the repository at this point in the history
new spdx_rpm script to create SPDX docs for RPMs built from src
  • Loading branch information
yonhan3 authored Feb 8, 2024
2 parents 3f02c2f + f0f9a20 commit 105d9e6
Show file tree
Hide file tree
Showing 7 changed files with 881 additions and 17 deletions.
17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Multiple Python scripts are developed to work together with these tools.
- bomsh_index_yocto.py script, which creates a blob index database for source packages of OpenEmbedded/Yocto.
- bomsh_index_ws.py script, which creates a blob index database for software build workspace.
- bomsh_sbom.py script, which creates or updates SPDX SBOM documents with OmniBOR info.
- bomsh_spdx_rpm.py script, which creates or updates SPDX SBOM documents for RPMs built from its src RPM.
- bomsh_art_tree.py script, which grafts new subtrees or prunes existing subtrees of OmniBOR artifact trees.
- bomsh_dynlib.py script, which creates raw_logfile of runtime-dependency fragments for ELF executables.
- bomsh_pylib.py script, which creates raw_logfile of runtime-dependency fragments for Python scripts.
Expand All @@ -58,11 +59,15 @@ Quick Start
For a quick start of using the Bomsh tool, run the below command:

$ git clone URL-of-this-git-repo bomsh
$ wget http://vault.centos.org/8-stream/AppStream/Source/SPackages/sysstat-11.7.3-9.el8.src.rpm
$ bomsh/scripts/bomsh_rebuild_rpm.py -c alma+epel-8-x86_64 --docker_image_base almalinux:8 -s sysstat-11.7.3-9.el8.src.rpm -d bomsh/scripts/sample_sysstat_cvedb.json -o outdir --syft_sbom
$ # if mock is >= 5.0 version, then the below "--mock_option=--no-bootstrap-image" command option may be needed
$ bomsh/scripts/bomsh_rebuild_rpm.py -c alma+epel-8-x86_64 --docker_image_base almalinux:8 -s sysstat-11.7.3-9.el8.src.rpm -d bomsh/scripts/sample_sysstat_cvedb.json -o outdir --syft_sbom --mock_option="--no-bootstrap-image --define 'packager BOMSH user $(id -un) at $(hostname)'"
$ wget https://vault.centos.org/8-stream/AppStream/Source/SPackages/sysstat-11.7.3-7.el8.src.rpm
$ bomsh/scripts/bomsh_rebuild_rpm.py -c alma+epel-8-x86_64 --docker_image_base almalinux:8 -s sysstat-11.7.3-7.el8.src.rpm -d bomsh/scripts/sample_sysstat_cvedb.json -o outdir --syft_sbom --bomsh_spdx --mock_option="--no-bootstrap-image --define 'packager BOMSH user $(id -un) at $(hostname)'"
$ grep -B1 -A3 CVElist outdir/bomsher_out/bomsh_logfiles/bomsh_search_jsonfile-details.json
$
$ # if mock is < 5.0 version, then the above "--mock_option=--no-bootstrap-image" command option may not be needed
$ wget https://vault.centos.org/8-stream/AppStream/Source/SPackages/sysstat-11.7.3-9.el8.src.rpm
$ bomsh/scripts/bomsh_rebuild_rpm.py -c alma+epel-8-x86_64 --docker_image_base almalinux:8 -s sysstat-11.7.3-9.el8.src.rpm -d bomsh/scripts/sample_sysstat_cvedb.json -o outdir3 --syft_sbom --bomsh_spdx
$ grep -B1 -A3 CVElist outdir3/bomsher_out/bomsh_logfiles/bomsh_search_jsonfile-details.json
$
$ # the above should take only a few minutes, and the below may take tens of minutes
$ wget https://buildinfos.debian.net/buildinfo-pool/s/sysstat/sysstat_11.7.3-1_all-amd64-source.buildinfo
$ bomsh/scripts/bomsh_rebuild_deb.py -f sysstat_11.7.3-1_all-amd64-source.buildinfo -d bomsh/scripts/sample_sysstat_cvedb.json -o outdir2 --syft_sbom --mmdebstrap_no_cleanup
Expand All @@ -75,7 +80,9 @@ are recorded by the Bomsh tool. The omnibor_dir/metadata/bomsh/* files contain
useful metadata collected by Bomsh. Also the bomsh_logfiles/bomsh_search_jsonfile* files
contain the constructed OmniBOR tree with relevant metadata for the built RPM/DEB packages,
the bomsh_logfiles/bomsh-index-* files contain the relevant package/blobs database,
and the syft_sbom/omnibor* files contain the SPDX SBOM documents with ExternalRef OmniBOR identifier.
the syft_sbom/omnibor* files contain the syft-generated SPDX SBOM documents with ExternalRef OmniBOR identifier,
and the bomsh_sbom/* files contain the SPDX SBOM documents with ExternalRef OmniBOR identifier
generated by the bomsh_spdx_rpm.py script.

Compile Bombash and Bomtrace from Source
----------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion scripts/bomsh_art_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def get_filetype(afile):
cmd = "file " + cmd_quote(afile) + " || true"
#print (cmd)
output = subprocess.check_output(cmd, shell=True, universal_newlines=True)
res = re.split(":\s+", output.strip())
res = re.split(r":\s+", output.strip())
if len(res) > 1:
return ": ".join(res[1:])
return "empty"
Expand Down
2 changes: 1 addition & 1 deletion scripts/bomsh_create_bom.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def get_filetype(afile):
cmd = "file " + cmd_quote(afile) + " || true"
#print (cmd)
output = subprocess.check_output(cmd, shell=True, universal_newlines=True)
res = re.split(":\s+", output.strip())
res = re.split(r":\s+", output.strip())
if len(res) > 1:
return ": ".join(res[1:])
return "empty"
Expand Down
2 changes: 1 addition & 1 deletion scripts/bomsh_dynlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def get_filetype(afile):
cmd = "file " + cmd_quote(afile) + " || true"
#print (cmd)
output = subprocess.check_output(cmd, shell=True, universal_newlines=True)
res = re.split(":\s+", output.strip())
res = re.split(r":\s+", output.strip())
if len(res) > 1:
return ": ".join(res[1:])
return "empty"
Expand Down
35 changes: 29 additions & 6 deletions scripts/bomsh_rebuild_rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ def fix_broken_symlinks(bomsher_outdir):
# Set up mock build environment
RUN \\
dnf install -y git wget mock rpm-build python3-pyyaml which automake autoconf ; \\
dnf group install -y "Development Tools" ; \\
dnf install -y git wget mock rpm-build python3-pip python3-pyyaml which automake autoconf ; \\
dnf clean all ;
# Set up bomtrace2/bomsh environment
Expand All @@ -141,11 +141,15 @@ def fix_broken_symlinks(bomsher_outdir):
./bootstrap && ./configure --enable-mpers=check && make ; \\
cp src/strace /tmp/bomtrace2 ;
# Set up SPDX tools-python environment
RUN cd /root ; git clone https://github.com/spdx/tools-python.git ;
# Bomtrace/Bomsh mock build run to generate OmniBOR documents
# if BASELINE_REBUILD is not empty, then it will not use bomtrace2 to run mock, that is, the baseline run.
# if CHROOT_CFG is not empty, then the provided mock chroot_cfg will be used, otherwise, default.cfg is used.
CMD if [ -z "${BASELINE_REBUILD}" ]; then bomtrace_cmd="/tmp/bomtrace2 -w /tmp/bomtrace_watched_programs -c /tmp/bomtrace.conf -o /tmp/bomsh_hook_strace_logfile " ; fi ; \\
if [ -z "${CHROOT_CFG}" ]; then CHROOT_CFG=$(basename $(readlink /etc/mock/default.cfg) .cfg) ; fi ; \\
if [ -z "${CHROOT_CFG}" ]; then CHROOT_CFG=$(basename $(readlink /etc/mock/default.cfg) .cfg) ; \\
elif [ -h /etc/mock/${CHROOT_CFG}.cfg ] ; then CHROOT_CFG=$(basename $(readlink /etc/mock/${CHROOT_CFG}.cfg) .cfg) ; fi ; \\
mkdir -p /out/bomsher_out ; cd /out/bomsher_out ; \\
# Need to put the extra MOCK_OPTION into an array for use by later mock command ; \\
echo $MOCK_OPTION ; eval "mock_opt=($MOCK_OPTION)" ; declare -p mock_opt ; \\
Expand All @@ -168,7 +172,10 @@ def fix_broken_symlinks(bomsher_outdir):
/tmp/bomsh_search_cve.py --derive_sbom -b omnibor_dir $cvedb_file_param -f $rpmfiles -vvv ; cp /tmp/bomsh_search_jsonfile* bomsh_logfiles/ ; \\
# Extra handling of syft generated SPDX SBOM documents ; \\
if [ "${SYFT_SBOM}" ]; then /tmp/bomsh_sbom.py -b omnibor_dir -F $rpmfiles -vv --output_dir syft_sbom --sbom_format spdx ; fi ; \\
if [ "${SYFT_SBOM}" ]; then /tmp/bomsh_sbom.py -b omnibor_dir -F $rpmfiles -vv --output_dir syft_sbom --sbom_format spdx-json ; fi ;
if [ "${SYFT_SBOM}" ]; then /tmp/bomsh_sbom.py -b omnibor_dir -F $rpmfiles -vv --output_dir syft_sbom --sbom_format spdx-json ; fi ; \\
# Extra handling of bomsh-spdx generated SPDX SBOM documents ; \\
export PYTHONPATH=/root/tools-python/src ; \\
if [ "${BOMSH_SPDX}" ]; then /tmp/bomsh_spdx_rpm.py -r $rpmfiles --output_dir bomsh_sbom --sbom_server_url http://your.org ; fi ;
'''

def create_dockerfile(work_dir):
Expand All @@ -185,10 +192,20 @@ def create_dockerfile(work_dir):
bomsh_dockerfile_str = bomsh_dockerfile_str.replace("RUN ", "RUN dnf install -y epel-release ; ")
if "almalinux" in from_str:
bomsh_dockerfile_str = bomsh_dockerfile_str.replace("RUN ", "RUN dnf install -y almalinux-release ; ")
if args.bomsh_spdx:
# bomsh_spdx_rpm.py requires additional python libraries from pip3
bomsh_dockerfile_str = bomsh_dockerfile_str.replace("dnf clean all ;",
"pip3 install requests license-expression beartype uritools rdflib xmltodict pyyaml packageurl-python ; \\\n"
" dnf clean all ;")
if args.bomsh_spdx and "almalinux:8" in from_str:
# almalinux8 has python3.6 version as default, but we need at least python3.8 version for bomsh_spdx_rpm.py and spdx/tools-python library
bomsh_dockerfile_str = bomsh_dockerfile_str.replace("dnf clean all ;",
"dnf install -y python38 python38-pip ; ln -sf /usr/bin/python3.8 /usr/bin/python3 ; ln -sf /usr/bin/pip3.8 /usr/bin/pip3 ; \\\n"
" pip3.8 install requests license-expression beartype uritools rdflib xmltodict pyyaml packageurl-python ; \\\n"
" dnf clean all ;")
if args.chroot_cfg and "mageia" in args.chroot_cfg: # special handling for mageia platform due to file permission check with multiple levels of symlinks
tokens = g_bomsh_dockerfile_str.splitlines()
# insert one line to do sed replacement of bomtrace.conf file to change bomtrace config
bomsh_dockerfile_str = '\n'.join(tokens[:11] + [" sed -i -e 's/#skip_checking_prog_access=1/skip_checking_prog_access=1/' /tmp/bomtrace.conf ; \\",] + tokens[11:]) + '\n'
bomsh_dockerfile_str = bomsh_dockerfile_str.replace("cp bomsh/scripts/*.py bomsh/bin/bomtrace* /tmp ; ",
"cp bomsh/scripts/*.py bomsh/bin/bomtrace* /tmp ; \\\n sed -i -e 's/#skip_checking_prog_access=1/skip_checking_prog_access=1/' /tmp/bomtrace.conf ; ")
dockerfile_str = from_str + bomsh_dockerfile_str
dockerfile = os.path.join(work_dir, "Dockerfile")
write_text_file(dockerfile, dockerfile_str)
Expand Down Expand Up @@ -224,6 +241,9 @@ def run_docker(src_rpm_file, output_dir):
if args.syft_sbom:
# Generate SBOM document with the syft tool
docker_cmd += ' -e SYFT_SBOM=1'
if args.bomsh_spdx:
# Generate SPDX SBOM document with the bomsh_spdx_rpm.py tool
docker_cmd += ' -e BOMSH_SPDX=1'
docker_cmd += ' -v ' + output_dir + ':/out $(docker build -t bomsher-rpm -q ' + bomsher_indir + ')'
verbose("==== Here is the docker run command: " + docker_cmd, LEVEL_1)
os.system(docker_cmd)
Expand Down Expand Up @@ -262,6 +282,9 @@ def rtd_parse_options():
parser.add_argument("--syft_sbom",
action = "store_true",
help = "run syft to generate RPM SBOM in spdx/spdx-json SBOM format")
parser.add_argument("--bomsh_spdx",
action = "store_true",
help = "run bomsh_spdx_rpm.py to generate RPM SBOM in spdx/spdx-json SBOM format")
parser.add_argument("-b", "--baseline_rebuild",
action = "store_true",
help = "baseline rebuild only, do not run bomtrace2 to generate OmniBOR documents")
Expand Down
8 changes: 5 additions & 3 deletions scripts/bomsh_search_cve.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def get_filetype(afile):
cmd = "file " + cmd_quote(afile) + " || true"
#print (cmd)
output = subprocess.check_output(cmd, shell=True, universal_newlines=True)
res = re.split(":\s+", output.strip())
res = re.split(r":\s+", output.strip())
if len(res) > 1:
return ": ".join(res[1:])
return "empty"
Expand Down Expand Up @@ -488,7 +488,7 @@ def get_all_gitbom_doc_files_with_checksum(topdir, checksum):
hexchar_num = 62
topdir_abspath = os.path.abspath(topdir)
# filter out .git/ directory which contains files with similar names.
cmd = 'find ' + topdir_abspath + ' -name "' + hexchar * hexchar_num + '" -path "*/[0-9a-f][0-9a-f]/*" -type f | grep -v "\/\.git\/" | xargs grep -l ' + checksum + ' || true'
cmd = 'find ' + topdir_abspath + ' -name "' + hexchar * hexchar_num + '" -path "*/[0-9a-f][0-9a-f]/*" -type f | grep -v "/\.git/" | xargs grep -l ' + checksum + ' || true'
verbose(cmd, LEVEL_3)
return get_shell_cmd_output(cmd).splitlines()

Expand Down Expand Up @@ -528,7 +528,7 @@ def get_all_gitbom_doc_files_in_dir(topdir, is_topdir=True):
else:
cmd = 'find ' + topdir_abspath + ' -name "' + hexchar * hexchar_num + '" -path "*/objects/[0-9a-f][0-9a-f]/*" -type f || true'
# filter out .git/ directory which contains files with similar names.
cmd = 'find ' + topdir_abspath + ' -name "' + hexchar * hexchar_num + '" -path "*/[0-9a-f][0-9a-f]/*" -type f | grep -v "\/\.git\/" || true'
cmd = 'find ' + topdir_abspath + ' -name "' + hexchar * hexchar_num + '" -path "*/[0-9a-f][0-9a-f]/*" -type f | grep -v "/\.git/" || true'
verbose(cmd, LEVEL_3)
output = get_shell_cmd_output(cmd)
ret = {}
Expand Down Expand Up @@ -704,6 +704,8 @@ def create_gitbom_doc_treedb_for_checksums(bomdir, checksums, use_checksum_line=
print("Warning: No embedded .omnibor section and no recorded bom_id mapping for blob ID: " + checksum)
continue
verbose("Will use mapping of blob_id: " + checksum + " bom_id: " + bom_id)
if checksum not in g_gitbom_doc_db: # save the top-level checksum => bom_id mapping too
g_gitbom_doc_db[checksum] = bom_id
if use_checksum_line:
# Add below blob_id to bom_id mapping for convenience, which has the is_self_hashtree attribute to distinguish from regular nodes.
checksum_line = "blob " + checksum + " bom " + bom_id
Expand Down
Loading

0 comments on commit 105d9e6

Please sign in to comment.