Skip to content

Commit

Permalink
merge: Enable analysis for multistage Dockerfiles
Browse files Browse the repository at this point in the history
This merge brings in changes that will enable analysis
of multistage Dockerfiles.

The problem with building multistage Dockerfiles is that Docker
deletes the intermediate stages. One is finally left with the last
container image that will contain the bare minimum to run
something that was compiled in the previous stages.

To get around this, Tern will split the Dockerfiles into several
Dockerfiles corresponding to each of the stages. To prevent
errors in copying artifacts from one stage to the next, Tern
creates the intermediate Dockerfiles like so:

original Dockerfile:
FROM x as stage1
...
FROM y as stage2
...
FROM z as stage3

Intermediate Dockerfiles:
1:
FROM x as stage1
...
2:
FROM x as stage1
...
FROM y as stage2
...
Tern will finally build the original Dockerfile to get the last
stage for analysis.

Note that the locking function still doesn't make use of this
feature.

Signed-off-by: Nisha K <nishak@vmware.com>
  • Loading branch information
Nisha K authored Nov 19, 2020
2 parents 453fad6 + 906edac commit bb38e14
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 63 deletions.
4 changes: 2 additions & 2 deletions ci/test_files_touched.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
re.compile('tern/classes/template.py'):
['python tests/test_class_template.py',
'tern report -f spdxtagvalue -i photon:3.0',
'tern lock Dockerfile'],
'tern lock docker/Dockerfile'],
# tern/command_lib
re.compile('tern/command_lib'): [
'tern report -i photon:3.0',
Expand All @@ -82,7 +82,7 @@
'tern report -i golang:alpine',
'tern report -d samples/alpine_python/Dockerfile',
'tern report -w photon.tar',
'tern lock Dockerfile'],
'tern lock docker/Dockerfile'],
# tern/load
re.compile('tern/load'): [
'python tests/test_load_docker_api.py'],
Expand Down
56 changes: 17 additions & 39 deletions tern/analyze/default/dockerfile/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from dockerfile_parse import DockerfileParser
import re
import logging
import os

from tern.utils import general
from tern.utils import constants
Expand Down Expand Up @@ -82,6 +81,7 @@ def get_dockerfile_obj(dockerfile_name, prev_env=None):
dfobj.envs = parser.envs
dfobj.prev_env = prev_env
dfobj.parent_images = parser.parent_images
dfobj.is_multistage = parser.is_multistage
return dfobj


Expand Down Expand Up @@ -303,8 +303,8 @@ def expand_add_command(dfobj):
+ ' # ' + comment_line


def check_multistage_dockerfile(dfobj):
"""Given a dockerfile object, return the index(es) of FROM line(s)
def get_from_indices(dfobj):
"""Given a dockerfile object, return the indices of FROM lines
in the dfobj structure."""
from_lines = []
for idx, st in enumerate(dfobj.structure):
Expand All @@ -313,40 +313,18 @@ def check_multistage_dockerfile(dfobj):
return from_lines


def get_multistage_image_dockerfiles(dfobj_multi):
"""Given a multistage dockerfile object, return a list of structures
for building image."""
file_path_list = []
structure = []
file_idx = 0
from_lines = check_multistage_dockerfile(dfobj_multi)
def get_dockerfile_stages(dfobj_multi):
"""Given a multistage dockerfile object, return a list of content for
each stage"""
stages = []
from_lines = get_from_indices(dfobj_multi)
# Pop the first FROM
from_lines.pop(0)
# Get the temp folder path
temp_folder_path = os.path.join(os.path.dirname(dfobj_multi.filepath),
constants.multistage_dir)
if not os.path.isdir(temp_folder_path):
os.mkdir(temp_folder_path)
for idx in range(len(dfobj_multi.structure)):
if idx in from_lines:
if structure:
df_folder_path = temp_folder_path + '/%d' % (file_idx)
# we make a new dir for the dockerfile of each stage.
if not os.path.isdir(df_folder_path):
os.mkdir(df_folder_path)
file_path = df_folder_path + '/Dockerfile'
file_idx += 1
write_dockerfile_by_structure(file_path, structure)
file_path_list.append(file_path)
structure.append(dfobj_multi.structure[idx])
if structure:
file_path_list.append(dfobj_multi.filepath)
return file_path_list


def write_dockerfile_by_structure(file_name, structure):
"""Given a dockerfile name and its structure, write the content into the
dockerfile."""
with open(file_name, 'w') as f:
for st in structure:
f.write(st['content'])
start_line = from_lines.pop(0)
while len(from_lines) >= 1:
stage = ""
end_line = from_lines.pop(0)
for idx in range(start_line, end_line):
if dfobj_multi.structure[idx]['instruction'] != 'COMMENT':
stage = stage + dfobj_multi.structure[idx]['content']
stages.append(stage)
return stages
91 changes: 77 additions & 14 deletions tern/analyze/default/dockerfile/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import docker
import logging
import os
import subprocess # nosec

from tern.utils import constants
Expand Down Expand Up @@ -164,42 +165,104 @@ def base_and_run_analysis(dfile, redo, driver, keep, extension):
return image_list


def analyze_single_dockerfile(dockerfile, redo, driver, keep_wd, extend):
"""Run image analysis for a single Dockerfile. Return a list of images.
Inputs are:
dockerfile: the Dockerfile file
redo: True if analysis needs to be redone
driver: The filesystem driver used
keep_wd: True if the working directory must not be cleaned up
extend: the extension used for analysis"""
# attempt to build the image
logger.debug('Building Docker image with Dockerfile: %s', dockerfile)
image_info = docker_api.build_and_dump(dockerfile)
image_list = []
if image_info:
logger.debug('Docker image successfully built. Analyzing...')
# analyze the full image
image_list = full_image_analysis(
dockerfile, redo, driver, keep_wd, extend)
else:
# cannot build the image
logger.warning('Cannot build image')
# analyze the base image and any RUN lines in the Dockerfile
image_list = base_and_run_analysis(
dockerfile, redo, driver, keep_wd, extend)
return image_list


def execute_dockerfile(args, locking=False):
"""Execution path for Dockerfiles"""
dfile = ''
if locking:
dfile = args.lock
else:
dfile = args.dockerfile
image_list = []
logger.debug("Parsing Dockerfile...")
dfobj = parse.get_dockerfile_obj(dfile)
# expand potential ARG values so base image tag is correct
parse.expand_arg(dfobj)
parse.expand_vars(dfobj)
# Store dockerfile path and commands so we can access it during execution
lock.load_docker_commands(dfobj)
# attempt to build the image
logger.debug('Building Docker image...')
image_info = docker_api.build_and_dump(dfile)
image_list = []
if image_info:
logger.debug('Docker image successfully built. Analyzing...')
# analyze the full image
image_list = full_image_analysis(
dfile, args.redo, args.driver, args.keep_wd, args.extend)
if dfobj.is_multistage:
image_list = analyze_multistage_dockerfile(
dfobj, args.redo, args.driver, args.keep_wd, args.extend)
else:
# cannot build the image
logger.warning('Cannot build image')
# analyze the base image and any RUN lines in the Dockerfile
image_list = base_and_run_analysis(
image_list = analyze_single_dockerfile(
dfile, args.redo, args.driver, args.keep_wd, args.extend)
# generate report based on what images were created
if image_list:
if not locking:
report.report_out(args, *image_list)
else:
logger.debug('Generating locked Dockerfile...')
# we can only lock based on a fully built image for now
# we can only lock one image for now
locked_dfobj = lock.lock_dockerfile(dfobj, image_list[0])
output = lock.create_locked_dockerfile(locked_dfobj)
lock.write_locked_dockerfile(output, args.output_file)


def write_dockerfile_stages(dfobj):
"""Given a Dockerfile object, create Dockerfiles for each of the
stages for analysis. Return a list of Dockerfiles"""
stages = parse.get_dockerfile_stages(dfobj)
dockerfiles = []
filepath, filename = os.path.split(dfobj.filepath)
for stage in stages:
stagefile = os.path.join(
filepath, '{}_{}'.format(filename, stages.index(stage) + 1))
with open(stagefile, 'w') as f:
f.write(stage)
dockerfiles.append(stagefile)
return dockerfiles


def clean_dockerfile_stages(dockerfiles):
"""Remove all the intermediate dockerfiles"""
for dockerfile in dockerfiles:
os.remove(dockerfile)


def analyze_multistage_dockerfile(dfobj, redo, driver, keep_wd, extend):
"""Split the multistage dockerfile, and then analyze on each stage.
Inputs:
dfobj: the Dockerfile object
redo: True when we want to redo the analysis
driver: the filesystem driver to use
keep_wd: keep working directory
extend: the extension to use for analysis"""
# split the multistage dockerfile into single stages for analysis
dockerfiles = write_dockerfile_stages(dfobj)
image_list = []
for dfile in dockerfiles:
imlist = analyze_single_dockerfile(
dfile, redo, driver, keep_wd, extend)
image_list.extend(imlist)
clean_dockerfile_stages(dockerfiles)
# finally build the existing Dockerfile
finallist = analyze_single_dockerfile(
dfobj.filepath, redo, driver, keep_wd, extend)
image_list.extend(finallist)
return image_list
27 changes: 19 additions & 8 deletions tern/load/docker_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,31 @@ def check_docker_setup():
sys.exit(1)


def build_image(dockerfile, client):
def build_image(dfile, client):
"""Invoke docker build with the given dockerfile. It is assumed that
docker is installed and the docker daemon is running"""
df_path = os.path.abspath(dockerfile)
df_path = os.path.abspath(dfile)
image_tag = '{name}:{tag}'.format(name=constants.image,
tag=str(int(time.time())))
# try to build the image
# TODO: docker's upstream API does not support build
# contexts yet. You are expected to provide that as
# a tarball as of the 4.3.1 release
# This is a hack to get around that
# source:
# https://github.com/docker/docker-py/issues/2105#issuecomment-613685891
dfcontents = ''
dfcontext = os.path.dirname(df_path)
try:
with open(df_path, 'rb') as f:
image_obj, _ = client.images.build(fileobj=f,
tag=image_tag,
nocache=True,
forcerm=True)
return image_obj
with open(df_path) as f:
dfcontents = f.read()
# terrible bypass of the API
docker.api.build.process_dockerfile = lambda dockerfile, path: (
df_path, dockerfile)
image_obj, _ = client.images.build(
tag=image_tag, path=dfcontext, dockerfile=dfcontents, nocache=True,
forcerm=True)
return image_obj
except FileNotFoundError as e:
logger.critical('Dockerfile not found: %s', e)
return None
Expand Down

0 comments on commit bb38e14

Please sign in to comment.