Skip to content

Commit

Permalink
merge: Integrate package data from scancode
Browse files Browse the repository at this point in the history
Currently only file data is included when tern is run with the scancode
This merge brings in changes to incorporate packages found by
scancode into the data model.

The first commit brings in changes to the scancode executor to incorporate
the package data. The second commit merges package data from scancode
and other package analyzing methods.

 Signed-off-by: Nisha K <nishak@vmware.com>
  • Loading branch information
Nisha K authored Aug 26, 2020
2 parents 888fb74 + dfcacca commit a0dd8cd
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 4 deletions.
19 changes: 19 additions & 0 deletions tern/classes/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,22 @@ def is_equal(self, other):
if value != other_pkg_dict[key]:
return False
return True

def merge(self, other):
'''Compare another Package object to this instance. If the name and
version are the same, we use the other object to fill in missing
metadata in the first one excluding the files and origins. This
method can be used in situations where an external scanner is used to
collect package data that we didn't find ourselves'''
if not isinstance(other, Package):
return False
if self.name == other.name and self.version == other.version:
other_pkg_dict = other.to_dict()
for key, value in self.to_dict().items():
if value == '' and other_pkg_dict[key] != '':
setattr(self, key, other_pkg_dict[key])
for lic in other.pkg_licenses:
if lic not in self.pkg_licenses:
self.pkg_licenses.append(lic)
return True
return False
39 changes: 35 additions & 4 deletions tern/extensions/scancode/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from tern.analyze import common
from tern.classes.notice import Notice
from tern.classes.file_data import FileData
from tern.classes.package import Package
from tern.extensions.executor import Executor
from tern.utils import constants
from tern.utils import rootfs
Expand Down Expand Up @@ -74,6 +75,20 @@ def get_scancode_file(file_dict):
return fd


def get_scancode_package(package_dict):
'''Given a package dictionary from the scancode results, return a Package
object with the results'''
package = Package(package_dict['name'])
package.version = package_dict['version']
package.pkg_license = package_dict['declared_license']
package.copyright = package_dict['copyright']
package.proj_url = package_dict['repository_homepage_url']
package.download_url = package_dict['download_url']
package.licenses = [package_dict['declared_license'],
package_dict['license_expression']]
return package


def add_scancode_headers(layer_obj, headers):
'''Given a list of headers from scancode data, add unique headers to
the list of existing headers in the layer object'''
Expand All @@ -86,10 +101,12 @@ def add_scancode_headers(layer_obj, headers):

def collect_layer_data(layer_obj):
'''Use scancode to collect data from a layer filesystem. This function will
create a FileData object for every file found. After scanning, it will
return a list of FileData objects.
create FileData and Package objects for every File and Package found. After
scanning, it will return a tuple with a list of FileData and a list of
Package objects.
'''
files = []
packages = []
# run scancode against a directory
command = 'scancode -ilpcu --quiet --timeout 300 --json -'
full_cmd = get_filesystem_command(layer_obj, command)
Expand All @@ -107,7 +124,9 @@ def collect_layer_data(layer_obj):
for f in data['files']:
if f['type'] == 'file' and f['size'] != 0:
files.append(get_scancode_file(f))
return files
for package in f['packages']:
packages.append(get_scancode_package(package))
return files, packages


def add_file_data(layer_obj, collected_files):
Expand All @@ -123,6 +142,17 @@ def add_file_data(layer_obj, collected_files):
break


def add_package_data(layer_obj, collected_packages):
'''Use the package data collected with scancode to fill in the package data
for an ImageLayer object'''
for collected_package in collected_packages:
for package in layer_obj.packages:
if package.merge(collected_package):
break
# If the package wasn't in the layer, add it
layer_obj.packages.append(collected_package)


class Scancode(Executor):
'''Execute scancode'''
def execute(self, image_obj, redo=False):
Expand All @@ -134,9 +164,10 @@ def execute(self, image_obj, redo=False):
common.load_from_cache(layer)
if redo or not layer.files_analyzed:
# the layer doesn't have analyzed files, so run analysis
file_list = collect_layer_data(layer)
file_list, package_list = collect_layer_data(layer)
if file_list:
add_file_data(layer, file_list)
layer.files_analyzed = True
add_package_data(layer, package_list)
# save data to the cache
common.save_to_cache(image_obj)
17 changes: 17 additions & 0 deletions tests/test_class_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,23 @@ def testFill(self):
self.assertEqual(p.origins.origins[0].notices[2].message,
"No metadata for key: download_url")

def testMerge(self):
p1 = Package('p1')
p1.version = '1.0'
p1.pkg_licenses = ['license1']
p2 = Package('p1')
p2.version = '1.0'
p2.download_url = 'SomeUrl'
p2.checksum = 'abc'
p2.pkg_licenses = ['license2']
self.assertFalse(p1.merge('astring'))
self.assertTrue(p1.merge(p2))
self.assertEqual(p1.download_url, 'SomeUrl')
self.assertEqual(p1.checksum, 'abc')
self.assertEqual(p1.pkg_licenses, ['license1', 'license2'])
p2.version = '2.0'
self.assertFalse(p1.merge(p2))


if __name__ == '__main__':
unittest.main()

0 comments on commit a0dd8cd

Please sign in to comment.