diff --git a/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl b/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl index 261f364cf..be1dd6c01 100644 --- a/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl +++ b/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl @@ -8,547 +8,792 @@ metadata: component: data-science-pipelines data: iris-pipeline-compiled.yaml: |- - apiVersion: tekton.dev/v1beta1 - kind: PipelineRun - metadata: - name: iris-pipeline - annotations: - tekton.dev/output_artifacts: '{"data-prep": [{"key": "artifacts/$PIPELINERUN/data-prep/X_test.tgz", - "name": "data-prep-X_test", "path": "/tmp/outputs/X_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/X_train.tgz", - "name": "data-prep-X_train", "path": "/tmp/outputs/X_train/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_test.tgz", - "name": "data-prep-y_test", "path": "/tmp/outputs/y_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_train.tgz", - "name": "data-prep-y_train", "path": "/tmp/outputs/y_train/data"}], "evaluate-model": - [{"key": "artifacts/$PIPELINERUN/evaluate-model/mlpipeline-metrics.tgz", "name": - "mlpipeline-metrics", "path": "/tmp/outputs/mlpipeline_metrics/data"}], "train-model": - [{"key": "artifacts/$PIPELINERUN/train-model/model.tgz", "name": "train-model-model", - "path": "/tmp/outputs/model/data"}]}' - tekton.dev/input_artifacts: '{"evaluate-model": [{"name": "data-prep-X_test", - "parent_task": "data-prep"}, {"name": "data-prep-y_test", "parent_task": "data-prep"}, - {"name": "train-model-model", "parent_task": "train-model"}], "train-model": - [{"name": "data-prep-X_train", "parent_task": "data-prep"}, {"name": "data-prep-y_train", - "parent_task": "data-prep"}], "validate-model": [{"name": "train-model-model", - "parent_task": "train-model"}]}' - tekton.dev/artifact_bucket: mlpipeline - tekton.dev/artifact_endpoint: minio-service.kubeflow:9000 - tekton.dev/artifact_endpoint_scheme: http:// - tekton.dev/artifact_items: '{"data-prep": [["X_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test"], - ["X_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train"], - ["y_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test"], - ["y_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train"]], - "evaluate-model": [["mlpipeline-metrics", "/tmp/outputs/mlpipeline_metrics/data"]], - "train-model": [["model", "$(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model"]], - "validate-model": []}' - sidecar.istio.io/inject: "false" - tekton.dev/template: '' - pipelines.kubeflow.org/big_data_passing_format: $(workspaces.$TASK_NAME.path)/artifacts/$ORIG_PR_NAME/$TASKRUN_NAME/$TASK_PARAM_NAME - pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "iris-model", "name": - "model_obc", "optional": true, "type": "String"}], "name": "Iris Pipeline"}' - labels: - pipelines.kubeflow.org/pipelinename: '' - pipelines.kubeflow.org/generation: '' - spec: +{{ if (eq .DSPVersion "v2") }} + # PIPELINE DEFINITION + # Name: iris-training-pipeline + # Inputs: + # min_max_scaler: bool + # neighbors: int + # standard_scaler: bool + # Outputs: + # train-model-metrics: system.ClassificationMetrics + components: + comp-create-dataset: + executorLabel: exec-create-dataset + outputDefinitions: + artifacts: + iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-normalize-dataset: + executorLabel: exec-normalize-dataset + inputDefinitions: + artifacts: + input_iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + min_max_scaler: + parameterType: BOOLEAN + standard_scaler: + parameterType: BOOLEAN + outputDefinitions: + artifacts: + normalized_iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-train-model: + executorLabel: exec-train-model + inputDefinitions: + artifacts: + normalized_iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + n_neighbors: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + metrics: + artifactType: + schemaTitle: system.ClassificationMetrics + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + deploymentSpec: + executors: + exec-create-dataset: + container: + args: + - --executor_input + - '{{"{{"}}${{"}}"}}' + - --function_to_execute + - create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef create_dataset(iris_dataset: Output[Dataset]):\n import pandas\ + \ as pd\n\n csv_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'\n\ + \ col_names = [\n 'Sepal_Length', 'Sepal_Width', 'Petal_Length',\ + \ 'Petal_Width', 'Labels'\n ]\n df = pd.read_csv(csv_url, names=col_names)\n\ + \n with open(iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" + image: quay.io/rmartine/data-science:test9 + exec-normalize-dataset: + container: + args: + - --executor_input + - '{{"{{"}}${{"}}"}}' + - --function_to_execute + - normalize_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\ + \ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\ + \ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\ + \ raise ValueError(\n 'Exactly one of standard_scaler\ + \ or min_max_scaler must be True.')\n\n import pandas as pd\n from\ + \ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\ + \ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\ + \ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\ + \ scaler = StandardScaler()\n if min_max_scaler:\n scaler\ + \ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\ + \ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\ + \ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\ + \ f:\n df.to_csv(f)\n\n" + image: quay.io/rmartine/data-science:test9 + exec-train-model: + container: + args: + - --executor_input + - '{{"{{"}}${{"}}"}}' + - --function_to_execute + - train_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef train_model(\n normalized_iris_dataset: Input[Dataset],\n\ + \ model: Output[Model],\n metrics: Output[ClassificationMetrics],\n\ + \ n_neighbors: int,\n):\n import pickle\n\n import pandas as pd\n\ + \ from sklearn.neighbors import KNeighborsClassifier\n\n from sklearn.metrics\ + \ import roc_curve\n from sklearn.model_selection import train_test_split,\ + \ cross_val_predict\n from sklearn.metrics import confusion_matrix\n\n\ + \n with open(normalized_iris_dataset.path) as f:\n df = pd.read_csv(f)\n\ + \n y = df.pop('Labels')\n X = df\n\n X_train, X_test, y_train,\ + \ y_test = train_test_split(X, y, random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ + \ clf.fit(X_train, y_train)\n\n predictions = cross_val_predict(\n\ + \ clf, X_train, y_train, cv=3)\n metrics.log_confusion_matrix(\n\ + \ ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica'],\n \ + \ confusion_matrix(\n y_train,\n predictions).tolist()\ + \ # .tolist() to convert np array to list.\n )\n\n model.metadata['framework']\ + \ = 'scikit-learn'\n with open(model.path, 'wb') as f:\n pickle.dump(clf,\ + \ f)\n\n" + image: quay.io/rmartine/data-science:test9 + pipelineInfo: + name: iris-training-pipeline + root: + dag: + outputs: + artifacts: + train-model-metrics: + artifactSelectors: + - outputArtifactKey: metrics + producerSubtask: train-model + tasks: + create-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-create-dataset + taskInfo: + name: create-dataset + normalize-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-normalize-dataset + dependentTasks: + - create-dataset + inputs: + artifacts: + input_iris_dataset: + taskOutputArtifact: + outputArtifactKey: iris_dataset + producerTask: create-dataset + parameters: + min_max_scaler: + runtimeValue: + constant: false + standard_scaler: + runtimeValue: + constant: true + taskInfo: + name: normalize-dataset + train-model: + cachingOptions: + enableCache: true + componentRef: + name: comp-train-model + dependentTasks: + - normalize-dataset + inputs: + artifacts: + normalized_iris_dataset: + taskOutputArtifact: + outputArtifactKey: normalized_iris_dataset + producerTask: normalize-dataset + parameters: + n_neighbors: + componentInputParameter: neighbors + taskInfo: + name: train-model + inputDefinitions: + parameters: + min_max_scaler: + parameterType: BOOLEAN + neighbors: + parameterType: NUMBER_INTEGER + standard_scaler: + parameterType: BOOLEAN + outputDefinitions: + artifacts: + train-model-metrics: + artifactType: + schemaTitle: system.ClassificationMetrics + schemaVersion: 0.0.1 + schemaVersion: 2.1.0 + sdkVersion: kfp-2.0.1 +{{ else }} + apiVersion: tekton.dev/v1beta1 + kind: PipelineRun + metadata: + name: iris-pipeline + annotations: + tekton.dev/output_artifacts: '{"data-prep": [{"key": "artifacts/$PIPELINERUN/data-prep/X_test.tgz", + "name": "data-prep-X_test", "path": "/tmp/outputs/X_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/X_train.tgz", + "name": "data-prep-X_train", "path": "/tmp/outputs/X_train/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_test.tgz", + "name": "data-prep-y_test", "path": "/tmp/outputs/y_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_train.tgz", + "name": "data-prep-y_train", "path": "/tmp/outputs/y_train/data"}], "evaluate-model": + [{"key": "artifacts/$PIPELINERUN/evaluate-model/mlpipeline-metrics.tgz", "name": + "mlpipeline-metrics", "path": "/tmp/outputs/mlpipeline_metrics/data"}], "train-model": + [{"key": "artifacts/$PIPELINERUN/train-model/model.tgz", "name": "train-model-model", + "path": "/tmp/outputs/model/data"}]}' + tekton.dev/input_artifacts: '{"evaluate-model": [{"name": "data-prep-X_test", + "parent_task": "data-prep"}, {"name": "data-prep-y_test", "parent_task": "data-prep"}, + {"name": "train-model-model", "parent_task": "train-model"}], "train-model": + [{"name": "data-prep-X_train", "parent_task": "data-prep"}, {"name": "data-prep-y_train", + "parent_task": "data-prep"}], "validate-model": [{"name": "train-model-model", + "parent_task": "train-model"}]}' + tekton.dev/artifact_bucket: mlpipeline + tekton.dev/artifact_endpoint: minio-service.kubeflow:9000 + tekton.dev/artifact_endpoint_scheme: http:// + tekton.dev/artifact_items: '{"data-prep": [["X_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test"], + ["X_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train"], + ["y_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test"], + ["y_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train"]], + "evaluate-model": [["mlpipeline-metrics", "/tmp/outputs/mlpipeline_metrics/data"]], + "train-model": [["model", "$(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model"]], + "validate-model": []}' + sidecar.istio.io/inject: "false" + tekton.dev/template: '' + pipelines.kubeflow.org/big_data_passing_format: $(workspaces.$TASK_NAME.path)/artifacts/$ORIG_PR_NAME/$TASKRUN_NAME/$TASK_PARAM_NAME + pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "iris-model", "name": + "model_obc", "optional": true, "type": "String"}], "name": "Iris Pipeline"}' + labels: + pipelines.kubeflow.org/pipelinename: '' + pipelines.kubeflow.org/generation: '' + spec: + params: + - name: model_obc + value: iris-model + pipelineSpec: params: - name: model_obc - value: iris-model - pipelineSpec: - params: - - name: model_obc - default: iris-model - tasks: - - name: data-prep - taskSpec: - steps: - - name: main - args: - - --X-train - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train - - --X-test - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test - - --y-train - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train - - --y-test - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def _make_parent_dirs_and_return_path(file_path: str): - import os - os.makedirs(os.path.dirname(file_path), exist_ok=True) - return file_path - - def data_prep( - X_train_file, - X_test_file, - y_train_file, - y_test_file, - ): - import pickle - - import pandas as pd - - from sklearn import datasets - from sklearn.model_selection import train_test_split - - def get_iris_data(): - iris = datasets.load_iris() - data = pd.DataFrame( - { - "sepalLength": iris.data[:, 0], - "sepalWidth": iris.data[:, 1], - "petalLength": iris.data[:, 2], - "petalWidth": iris.data[:, 3], - "species": iris.target, - } - ) - - print("Initial Dataset:") - print(data.head()) - - return data - - def create_training_set(dataset, test_size = 0.3): - # Features - X = dataset[["sepalLength", "sepalWidth", "petalLength", "petalWidth"]] - # Labels - y = dataset["species"] - - # Split dataset into training set and test set - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=test_size, random_state=11 - ) - - return X_train, X_test, y_train, y_test - - def save_pickle(object_file, target_object): - with open(object_file, "wb") as f: - pickle.dump(target_object, f) - - dataset = get_iris_data() - X_train, X_test, y_train, y_test = create_training_set(dataset) - - save_pickle(X_train_file, X_train) - save_pickle(X_test_file, X_test) - save_pickle(y_train_file, y_train) - save_pickle(y_test_file, y_test) - - import argparse - _parser = argparse.ArgumentParser(prog='Data prep', description='') - _parser.add_argument("--X-train", dest="X_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--X-test", dest="X_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-train", dest="y_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-test", dest="y_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = data_prep(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: output-taskrun-name - command: - - sh - - -ec - - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: copy-results-artifacts - command: - - sh - - -ec - - | - set -exo pipefail - TOTAL_SIZE=0 - copy_artifact() { + default: iris-model + tasks: + - name: data-prep + taskSpec: + steps: + - name: main + args: + - --X-train + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train + - --X-test + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test + - --y-train + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train + - --y-test + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def _make_parent_dirs_and_return_path(file_path: str): + import os + os.makedirs(os.path.dirname(file_path), exist_ok=True) + return file_path + + def data_prep( + X_train_file, + X_test_file, + y_train_file, + y_test_file, + ): + import pickle + + import pandas as pd + + from sklearn import datasets + from sklearn.model_selection import train_test_split + + def get_iris_data(): + iris = datasets.load_iris() + data = pd.DataFrame( + { + "sepalLength": iris.data[:, 0], + "sepalWidth": iris.data[:, 1], + "petalLength": iris.data[:, 2], + "petalWidth": iris.data[:, 3], + "species": iris.target, + } + ) + + print("Initial Dataset:") + print(data.head()) + + return data + + def create_training_set(dataset, test_size = 0.3): + # Features + X = dataset[["sepalLength", "sepalWidth", "petalLength", "petalWidth"]] + # Labels + y = dataset["species"] + + # Split dataset into training set and test set + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=test_size, random_state=11 + ) + + return X_train, X_test, y_train, y_test + + def save_pickle(object_file, target_object): + with open(object_file, "wb") as f: + pickle.dump(target_object, f) + + dataset = get_iris_data() + X_train, X_test, y_train, y_test = create_training_set(dataset) + + save_pickle(X_train_file, X_train) + save_pickle(X_test_file, X_test) + save_pickle(y_train_file, y_train) + save_pickle(y_test_file, y_test) + + import argparse + _parser = argparse.ArgumentParser(prog='Data prep', description='') + _parser.add_argument("--X-train", dest="X_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--X-test", dest="X_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-train", dest="y_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-test", dest="y_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = data_prep(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: output-taskrun-name + command: + - sh + - -ec + - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: copy-results-artifacts + command: + - sh + - -ec + - | + set -exo pipefail + TOTAL_SIZE=0 + copy_artifact() { + if [ -d "$1" ]; then + tar -czvf "$1".tar.gz "$1" + SUFFIX=".tar.gz" + fi + ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` + TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) + touch "$2" + if [[ $TOTAL_SIZE -lt 3072 ]]; then if [ -d "$1" ]; then - tar -czvf "$1".tar.gz "$1" - SUFFIX=".tar.gz" - fi - ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` - TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) - touch "$2" - if [[ $TOTAL_SIZE -lt 3072 ]]; then - if [ -d "$1" ]; then - tar -tzf "$1".tar.gz > "$2" - elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then - cp "$1" "$2" - fi + tar -tzf "$1".tar.gz > "$2" + elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then + cp "$1" "$2" fi - } - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train $(results.X-train.path) - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test $(results.X-test.path) - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train $(results.y-train.path) - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test $(results.y-test.path) - onError: continue - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - results: - - name: X-test - description: /tmp/outputs/X_test/data - - name: X-train - description: /tmp/outputs/X_train/data - - name: taskrun-name - - name: y-test - description: /tmp/outputs/y_test/data - - name: y-train - description: /tmp/outputs/y_train/data - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Data prep", "outputs": - [{"name": "X_train"}, {"name": "X_test"}, {"name": "y_train"}, {"name": - "y_test"}], "version": "Data prep@sha256=5aeb512900f57983c9f643ec30ddb4ccc66490a443269b51ce0a67d57cb373b0"}' - workspaces: - - name: data-prep + fi + } + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train $(results.X-train.path) + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test $(results.X-test.path) + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train $(results.y-train.path) + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test $(results.y-test.path) + onError: continue + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + results: + - name: X-test + description: /tmp/outputs/X_test/data + - name: X-train + description: /tmp/outputs/X_train/data + - name: taskrun-name + - name: y-test + description: /tmp/outputs/y_test/data + - name: y-train + description: /tmp/outputs/y_train/data + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Data prep", "outputs": + [{"name": "X_train"}, {"name": "X_test"}, {"name": "y_train"}, {"name": + "y_test"}], "version": "Data prep@sha256=5aeb512900f57983c9f643ec30ddb4ccc66490a443269b51ce0a67d57cb373b0"}' workspaces: - name: data-prep - workspace: iris-pipeline - - name: train-model - params: - - name: data-prep-trname - value: $(tasks.data-prep.results.taskrun-name) - taskSpec: - steps: - - name: main - args: - - --X-train - - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_train - - --y-train - - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_train - - --model - - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def _make_parent_dirs_and_return_path(file_path: str): - import os - os.makedirs(os.path.dirname(file_path), exist_ok=True) - return file_path - - def train_model( - X_train_file, - y_train_file, - model_file, - ): - import pickle - - from sklearn.ensemble import RandomForestClassifier - - def load_pickle(object_file): - with open(object_file, "rb") as f: - target_object = pickle.load(f) - - return target_object - - def save_pickle(object_file, target_object): - with open(object_file, "wb") as f: - pickle.dump(target_object, f) - - def train_iris(X_train, y_train): - model = RandomForestClassifier(n_estimators=100) - model.fit(X_train, y_train) - - return model - - X_train = load_pickle(X_train_file) - y_train = load_pickle(y_train_file) - - model = train_iris(X_train, y_train) - - save_pickle(model_file, model) - - import argparse - _parser = argparse.ArgumentParser(prog='Train model', description='') - _parser.add_argument("--X-train", dest="X_train_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-train", dest="y_train_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--model", dest="model_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = train_model(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: output-taskrun-name - command: - - sh - - -ec - - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: copy-results-artifacts - command: - - sh - - -ec - - | - set -exo pipefail - TOTAL_SIZE=0 - copy_artifact() { + workspaces: + - name: data-prep + workspace: iris-pipeline + - name: train-model + params: + - name: data-prep-trname + value: $(tasks.data-prep.results.taskrun-name) + taskSpec: + steps: + - name: main + args: + - --X-train + - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_train + - --y-train + - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_train + - --model + - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def _make_parent_dirs_and_return_path(file_path: str): + import os + os.makedirs(os.path.dirname(file_path), exist_ok=True) + return file_path + + def train_model( + X_train_file, + y_train_file, + model_file, + ): + import pickle + + from sklearn.ensemble import RandomForestClassifier + + def load_pickle(object_file): + with open(object_file, "rb") as f: + target_object = pickle.load(f) + + return target_object + + def save_pickle(object_file, target_object): + with open(object_file, "wb") as f: + pickle.dump(target_object, f) + + def train_iris(X_train, y_train): + model = RandomForestClassifier(n_estimators=100) + model.fit(X_train, y_train) + + return model + + X_train = load_pickle(X_train_file) + y_train = load_pickle(y_train_file) + + model = train_iris(X_train, y_train) + + save_pickle(model_file, model) + + import argparse + _parser = argparse.ArgumentParser(prog='Train model', description='') + _parser.add_argument("--X-train", dest="X_train_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-train", dest="y_train_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--model", dest="model_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = train_model(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: output-taskrun-name + command: + - sh + - -ec + - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: copy-results-artifacts + command: + - sh + - -ec + - | + set -exo pipefail + TOTAL_SIZE=0 + copy_artifact() { + if [ -d "$1" ]; then + tar -czvf "$1".tar.gz "$1" + SUFFIX=".tar.gz" + fi + ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` + TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) + touch "$2" + if [[ $TOTAL_SIZE -lt 3072 ]]; then if [ -d "$1" ]; then - tar -czvf "$1".tar.gz "$1" - SUFFIX=".tar.gz" + tar -tzf "$1".tar.gz > "$2" + elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then + cp "$1" "$2" fi - ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` - TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) - touch "$2" - if [[ $TOTAL_SIZE -lt 3072 ]]; then - if [ -d "$1" ]; then - tar -tzf "$1".tar.gz > "$2" - elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then - cp "$1" "$2" - fi - fi - } - copy_artifact $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model $(results.model.path) - onError: continue - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - params: - - name: data-prep-trname - results: - - name: model - description: /tmp/outputs/model/data - - name: taskrun-name - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Train model", - "outputs": [{"name": "model"}], "version": "Train model@sha256=cb1fbd399ee5849dcdfaafced23a0496cae1d5861795062b22512b766ec418ce"}' - workspaces: - - name: train-model + fi + } + copy_artifact $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model $(results.model.path) + onError: continue + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + params: + - name: data-prep-trname + results: + - name: model + description: /tmp/outputs/model/data + - name: taskrun-name + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Train model", + "outputs": [{"name": "model"}], "version": "Train model@sha256=cb1fbd399ee5849dcdfaafced23a0496cae1d5861795062b22512b766ec418ce"}' workspaces: - name: train-model - workspace: iris-pipeline - runAfter: - - data-prep - - data-prep - - name: evaluate-model + workspaces: + - name: train-model + workspace: iris-pipeline + runAfter: + - data-prep + - data-prep + - name: evaluate-model + params: + - name: data-prep-trname + value: $(tasks.data-prep.results.taskrun-name) + - name: train-model-trname + value: $(tasks.train-model.results.taskrun-name) + taskSpec: + steps: + - name: main + args: + - --X-test + - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_test + - --y-test + - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_test + - --model + - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model + - --mlpipeline-metrics + - /tmp/outputs/mlpipeline_metrics/data + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def _make_parent_dirs_and_return_path(file_path: str): + import os + os.makedirs(os.path.dirname(file_path), exist_ok=True) + return file_path + + def evaluate_model( + X_test_file, + y_test_file, + model_file, + mlpipeline_metrics_file, + ): + import json + import pickle + + from sklearn.metrics import accuracy_score + + def load_pickle(object_file): + with open(object_file, "rb") as f: + target_object = pickle.load(f) + + return target_object + + X_test = load_pickle(X_test_file) + y_test = load_pickle(y_test_file) + model = load_pickle(model_file) + + y_pred = model.predict(X_test) + + accuracy_score_metric = accuracy_score(y_test, y_pred) + print(f"Accuracy: {accuracy_score_metric}") + + metrics = { + "metrics": [ + { + "name": "accuracy-score", + "numberValue": accuracy_score_metric, + "format": "PERCENTAGE", + }, + ] + } + + with open(mlpipeline_metrics_file, "w") as f: + json.dump(metrics, f) + + import argparse + _parser = argparse.ArgumentParser(prog='Evaluate model', description='') + _parser.add_argument("--X-test", dest="X_test_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-test", dest="y_test_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--mlpipeline-metrics", dest="mlpipeline_metrics_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = evaluate_model(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] params: - name: data-prep-trname - value: $(tasks.data-prep.results.taskrun-name) - name: train-model-trname - value: $(tasks.train-model.results.taskrun-name) - taskSpec: - steps: - - name: main - args: - - --X-test - - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_test - - --y-test - - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_test - - --model - - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model - - --mlpipeline-metrics - - /tmp/outputs/mlpipeline_metrics/data - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def _make_parent_dirs_and_return_path(file_path: str): - import os - os.makedirs(os.path.dirname(file_path), exist_ok=True) - return file_path - - def evaluate_model( - X_test_file, - y_test_file, - model_file, - mlpipeline_metrics_file, - ): - import json - import pickle - - from sklearn.metrics import accuracy_score - - def load_pickle(object_file): - with open(object_file, "rb") as f: - target_object = pickle.load(f) - - return target_object - - X_test = load_pickle(X_test_file) - y_test = load_pickle(y_test_file) - model = load_pickle(model_file) - - y_pred = model.predict(X_test) - - accuracy_score_metric = accuracy_score(y_test, y_pred) - print(f"Accuracy: {accuracy_score_metric}") - - metrics = { - "metrics": [ - { - "name": "accuracy-score", - "numberValue": accuracy_score_metric, - "format": "PERCENTAGE", - }, - ] - } - - with open(mlpipeline_metrics_file, "w") as f: - json.dump(metrics, f) - - import argparse - _parser = argparse.ArgumentParser(prog='Evaluate model', description='') - _parser.add_argument("--X-test", dest="X_test_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-test", dest="y_test_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--mlpipeline-metrics", dest="mlpipeline_metrics_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = evaluate_model(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - params: - - name: data-prep-trname - - name: train-model-trname - stepTemplate: - volumeMounts: - - name: mlpipeline-metrics - mountPath: /tmp/outputs/mlpipeline_metrics - volumes: + stepTemplate: + volumeMounts: - name: mlpipeline-metrics - emptyDir: {} - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Evaluate model", - "outputs": [{"name": "mlpipeline_metrics", "type": "Metrics"}], "version": - "Evaluate model@sha256=f398e65faecc6f5a4ba11a2c78d8a2274e3ede205a0e199c8bb615531a3abd4a"}' - workspaces: - - name: evaluate-model + mountPath: /tmp/outputs/mlpipeline_metrics + volumes: + - name: mlpipeline-metrics + emptyDir: {} + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Evaluate model", + "outputs": [{"name": "mlpipeline_metrics", "type": "Metrics"}], "version": + "Evaluate model@sha256=f398e65faecc6f5a4ba11a2c78d8a2274e3ede205a0e199c8bb615531a3abd4a"}' workspaces: - name: evaluate-model - workspace: iris-pipeline - runAfter: - - data-prep - - data-prep - - train-model - - name: validate-model + workspaces: + - name: evaluate-model + workspace: iris-pipeline + runAfter: + - data-prep + - data-prep + - train-model + - name: validate-model + params: + - name: train-model-trname + value: $(tasks.train-model.results.taskrun-name) + taskSpec: + steps: + - name: main + args: + - --model + - $(workspaces.validate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def validate_model(model_file): + import pickle + + def load_pickle(object_file): + with open(object_file, "rb") as f: + target_object = pickle.load(f) + + return target_object + + model = load_pickle(model_file) + + input_values = [[5, 3, 1.6, 0.2]] + + print(f"Performing test prediction on {input_values}") + result = model.predict(input_values) + + print(f"Response: {result}") + + import argparse + _parser = argparse.ArgumentParser(prog='Validate model', description='') + _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = validate_model(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] params: - name: train-model-trname - value: $(tasks.train-model.results.taskrun-name) - taskSpec: - steps: - - name: main - args: - - --model - - $(workspaces.validate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def validate_model(model_file): - import pickle - - def load_pickle(object_file): - with open(object_file, "rb") as f: - target_object = pickle.load(f) - - return target_object - - model = load_pickle(model_file) - - input_values = [[5, 3, 1.6, 0.2]] - - print(f"Performing test prediction on {input_values}") - result = model.predict(input_values) - - print(f"Response: {result}") - - import argparse - _parser = argparse.ArgumentParser(prog='Validate model', description='') - _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = validate_model(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - params: - - name: train-model-trname - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Validate model", - "outputs": [], "version": "Validate model@sha256=53d18ff94fc8f164e7d8455f2c87fa7fdac17e7502502aaa52012e4247d089ee"}' - workspaces: - - name: validate-model + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Validate model", + "outputs": [], "version": "Validate model@sha256=53d18ff94fc8f164e7d8455f2c87fa7fdac17e7502502aaa52012e4247d089ee"}' workspaces: - name: validate-model - workspace: iris-pipeline - runAfter: - - train-model workspaces: - - name: iris-pipeline + - name: validate-model + workspace: iris-pipeline + runAfter: + - train-model workspaces: - name: iris-pipeline - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 2Gi + workspaces: + - name: iris-pipeline + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi +{{ end }} \ No newline at end of file diff --git a/controllers/dspipeline_params.go b/controllers/dspipeline_params.go index 75445ca5a..e816b131e 100644 --- a/controllers/dspipeline_params.go +++ b/controllers/dspipeline_params.go @@ -39,6 +39,7 @@ type DSPAParams struct { Name string Namespace string Owner mf.Owner + DSPVersion string APIServer *dspa.APIServer APIServerServiceName string OAuthProxy string @@ -432,6 +433,7 @@ func setResourcesDefault(defaultValue dspa.ResourceRequirements, value **dspa.Re func (p *DSPAParams) ExtractParams(ctx context.Context, dsp *dspa.DataSciencePipelinesApplication, client client.Client, log logr.Logger) error { p.Name = dsp.Name p.Namespace = dsp.Namespace + p.DSPVersion = dsp.Spec.DSPVersion p.Owner = dsp p.APIServer = dsp.Spec.APIServer.DeepCopy() p.APIServerServiceName = fmt.Sprintf("%s-%s", config.DSPServicePrefix, p.Name) diff --git a/controllers/testdata/declarative/case_2/deploy/cr.yaml b/controllers/testdata/declarative/case_2/deploy/cr.yaml index 597f328d6..a799d26a0 100644 --- a/controllers/testdata/declarative/case_2/deploy/cr.yaml +++ b/controllers/testdata/declarative/case_2/deploy/cr.yaml @@ -3,6 +3,7 @@ kind: DataSciencePipelinesApplication metadata: name: testdsp2 spec: + dspVersion: v2 apiServer: deploy: true image: api-server:test2 diff --git a/controllers/testdata/declarative/case_2/expected/created/sample-pipeline.yaml.tmpl b/controllers/testdata/declarative/case_2/expected/created/sample-pipeline.yaml.tmpl index 9b1ac3cd8..8d94e5bf6 100644 --- a/controllers/testdata/declarative/case_2/expected/created/sample-pipeline.yaml.tmpl +++ b/controllers/testdata/declarative/case_2/expected/created/sample-pipeline.yaml.tmpl @@ -8,547 +8,245 @@ metadata: component: data-science-pipelines data: iris-pipeline-compiled.yaml: |- - apiVersion: tekton.dev/v1beta1 - kind: PipelineRun - metadata: - name: iris-pipeline - annotations: - tekton.dev/output_artifacts: '{"data-prep": [{"key": "artifacts/$PIPELINERUN/data-prep/X_test.tgz", - "name": "data-prep-X_test", "path": "/tmp/outputs/X_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/X_train.tgz", - "name": "data-prep-X_train", "path": "/tmp/outputs/X_train/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_test.tgz", - "name": "data-prep-y_test", "path": "/tmp/outputs/y_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_train.tgz", - "name": "data-prep-y_train", "path": "/tmp/outputs/y_train/data"}], "evaluate-model": - [{"key": "artifacts/$PIPELINERUN/evaluate-model/mlpipeline-metrics.tgz", "name": - "mlpipeline-metrics", "path": "/tmp/outputs/mlpipeline_metrics/data"}], "train-model": - [{"key": "artifacts/$PIPELINERUN/train-model/model.tgz", "name": "train-model-model", - "path": "/tmp/outputs/model/data"}]}' - tekton.dev/input_artifacts: '{"evaluate-model": [{"name": "data-prep-X_test", - "parent_task": "data-prep"}, {"name": "data-prep-y_test", "parent_task": "data-prep"}, - {"name": "train-model-model", "parent_task": "train-model"}], "train-model": - [{"name": "data-prep-X_train", "parent_task": "data-prep"}, {"name": "data-prep-y_train", - "parent_task": "data-prep"}], "validate-model": [{"name": "train-model-model", - "parent_task": "train-model"}]}' - tekton.dev/artifact_bucket: mlpipeline - tekton.dev/artifact_endpoint: minio-service.kubeflow:9000 - tekton.dev/artifact_endpoint_scheme: http:// - tekton.dev/artifact_items: '{"data-prep": [["X_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test"], - ["X_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train"], - ["y_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test"], - ["y_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train"]], - "evaluate-model": [["mlpipeline-metrics", "/tmp/outputs/mlpipeline_metrics/data"]], - "train-model": [["model", "$(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model"]], - "validate-model": []}' - sidecar.istio.io/inject: "false" - tekton.dev/template: '' - pipelines.kubeflow.org/big_data_passing_format: $(workspaces.$TASK_NAME.path)/artifacts/$ORIG_PR_NAME/$TASKRUN_NAME/$TASK_PARAM_NAME - pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "iris-model", "name": - "model_obc", "optional": true, "type": "String"}], "name": "Iris Pipeline"}' - labels: - pipelines.kubeflow.org/pipelinename: '' - pipelines.kubeflow.org/generation: '' - spec: - params: - - name: model_obc - value: iris-model - pipelineSpec: - params: - - name: model_obc - default: iris-model - tasks: - - name: data-prep - taskSpec: - steps: - - name: main - args: - - --X-train - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train - - --X-test - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test - - --y-train - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train - - --y-test - - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def _make_parent_dirs_and_return_path(file_path: str): - import os - os.makedirs(os.path.dirname(file_path), exist_ok=True) - return file_path - - def data_prep( - X_train_file, - X_test_file, - y_train_file, - y_test_file, - ): - import pickle - - import pandas as pd - - from sklearn import datasets - from sklearn.model_selection import train_test_split - - def get_iris_data(): - iris = datasets.load_iris() - data = pd.DataFrame( - { - "sepalLength": iris.data[:, 0], - "sepalWidth": iris.data[:, 1], - "petalLength": iris.data[:, 2], - "petalWidth": iris.data[:, 3], - "species": iris.target, - } - ) - - print("Initial Dataset:") - print(data.head()) - - return data - - def create_training_set(dataset, test_size = 0.3): - # Features - X = dataset[["sepalLength", "sepalWidth", "petalLength", "petalWidth"]] - # Labels - y = dataset["species"] - - # Split dataset into training set and test set - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=test_size, random_state=11 - ) - - return X_train, X_test, y_train, y_test - - def save_pickle(object_file, target_object): - with open(object_file, "wb") as f: - pickle.dump(target_object, f) - - dataset = get_iris_data() - X_train, X_test, y_train, y_test = create_training_set(dataset) - - save_pickle(X_train_file, X_train) - save_pickle(X_test_file, X_test) - save_pickle(y_train_file, y_train) - save_pickle(y_test_file, y_test) - - import argparse - _parser = argparse.ArgumentParser(prog='Data prep', description='') - _parser.add_argument("--X-train", dest="X_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--X-test", dest="X_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-train", dest="y_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-test", dest="y_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = data_prep(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: output-taskrun-name - command: - - sh - - -ec - - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: copy-results-artifacts - command: - - sh - - -ec - - | - set -exo pipefail - TOTAL_SIZE=0 - copy_artifact() { - if [ -d "$1" ]; then - tar -czvf "$1".tar.gz "$1" - SUFFIX=".tar.gz" - fi - ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` - TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) - touch "$2" - if [[ $TOTAL_SIZE -lt 3072 ]]; then - if [ -d "$1" ]; then - tar -tzf "$1".tar.gz > "$2" - elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then - cp "$1" "$2" - fi - fi - } - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train $(results.X-train.path) - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test $(results.X-test.path) - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train $(results.y-train.path) - copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test $(results.y-test.path) - onError: continue - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - results: - - name: X-test - description: /tmp/outputs/X_test/data - - name: X-train - description: /tmp/outputs/X_train/data - - name: taskrun-name - - name: y-test - description: /tmp/outputs/y_test/data - - name: y-train - description: /tmp/outputs/y_train/data - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Data prep", "outputs": - [{"name": "X_train"}, {"name": "X_test"}, {"name": "y_train"}, {"name": - "y_test"}], "version": "Data prep@sha256=5aeb512900f57983c9f643ec30ddb4ccc66490a443269b51ce0a67d57cb373b0"}' - workspaces: - - name: data-prep - workspaces: - - name: data-prep - workspace: iris-pipeline - - name: train-model - params: - - name: data-prep-trname - value: $(tasks.data-prep.results.taskrun-name) - taskSpec: - steps: - - name: main - args: - - --X-train - - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_train - - --y-train - - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_train - - --model - - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def _make_parent_dirs_and_return_path(file_path: str): - import os - os.makedirs(os.path.dirname(file_path), exist_ok=True) - return file_path - - def train_model( - X_train_file, - y_train_file, - model_file, - ): - import pickle - - from sklearn.ensemble import RandomForestClassifier - - def load_pickle(object_file): - with open(object_file, "rb") as f: - target_object = pickle.load(f) - - return target_object - - def save_pickle(object_file, target_object): - with open(object_file, "wb") as f: - pickle.dump(target_object, f) - - def train_iris(X_train, y_train): - model = RandomForestClassifier(n_estimators=100) - model.fit(X_train, y_train) - - return model - - X_train = load_pickle(X_train_file) - y_train = load_pickle(y_train_file) - - model = train_iris(X_train, y_train) - - save_pickle(model_file, model) - - import argparse - _parser = argparse.ArgumentParser(prog='Train model', description='') - _parser.add_argument("--X-train", dest="X_train_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-train", dest="y_train_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--model", dest="model_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = train_model(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: output-taskrun-name - command: - - sh - - -ec - - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" - - image: registry.access.redhat.com/ubi8/ubi-minimal - name: copy-results-artifacts - command: - - sh - - -ec - - | - set -exo pipefail - TOTAL_SIZE=0 - copy_artifact() { - if [ -d "$1" ]; then - tar -czvf "$1".tar.gz "$1" - SUFFIX=".tar.gz" - fi - ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` - TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) - touch "$2" - if [[ $TOTAL_SIZE -lt 3072 ]]; then - if [ -d "$1" ]; then - tar -tzf "$1".tar.gz > "$2" - elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then - cp "$1" "$2" - fi - fi - } - copy_artifact $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model $(results.model.path) - onError: continue - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - params: - - name: data-prep-trname - results: - - name: model - description: /tmp/outputs/model/data - - name: taskrun-name - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Train model", - "outputs": [{"name": "model"}], "version": "Train model@sha256=cb1fbd399ee5849dcdfaafced23a0496cae1d5861795062b22512b766ec418ce"}' - workspaces: - - name: train-model - workspaces: - - name: train-model - workspace: iris-pipeline - runAfter: - - data-prep - - data-prep - - name: evaluate-model - params: - - name: data-prep-trname - value: $(tasks.data-prep.results.taskrun-name) - - name: train-model-trname - value: $(tasks.train-model.results.taskrun-name) - taskSpec: - steps: - - name: main - args: - - --X-test - - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_test - - --y-test - - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_test - - --model - - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model - - --mlpipeline-metrics - - /tmp/outputs/mlpipeline_metrics/data - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def _make_parent_dirs_and_return_path(file_path: str): - import os - os.makedirs(os.path.dirname(file_path), exist_ok=True) - return file_path - - def evaluate_model( - X_test_file, - y_test_file, - model_file, - mlpipeline_metrics_file, - ): - import json - import pickle - - from sklearn.metrics import accuracy_score - - def load_pickle(object_file): - with open(object_file, "rb") as f: - target_object = pickle.load(f) - - return target_object - - X_test = load_pickle(X_test_file) - y_test = load_pickle(y_test_file) - model = load_pickle(model_file) - - y_pred = model.predict(X_test) - - accuracy_score_metric = accuracy_score(y_test, y_pred) - print(f"Accuracy: {accuracy_score_metric}") - - metrics = { - "metrics": [ - { - "name": "accuracy-score", - "numberValue": accuracy_score_metric, - "format": "PERCENTAGE", - }, - ] - } - - with open(mlpipeline_metrics_file, "w") as f: - json.dump(metrics, f) - - import argparse - _parser = argparse.ArgumentParser(prog='Evaluate model', description='') - _parser.add_argument("--X-test", dest="X_test_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--y-test", dest="y_test_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) - _parser.add_argument("--mlpipeline-metrics", dest="mlpipeline_metrics_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = evaluate_model(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - params: - - name: data-prep-trname - - name: train-model-trname - stepTemplate: - volumeMounts: - - name: mlpipeline-metrics - mountPath: /tmp/outputs/mlpipeline_metrics - volumes: - - name: mlpipeline-metrics - emptyDir: {} - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Evaluate model", - "outputs": [{"name": "mlpipeline_metrics", "type": "Metrics"}], "version": - "Evaluate model@sha256=f398e65faecc6f5a4ba11a2c78d8a2274e3ede205a0e199c8bb615531a3abd4a"}' - workspaces: - - name: evaluate-model - workspaces: - - name: evaluate-model - workspace: iris-pipeline - runAfter: - - data-prep - - data-prep - - train-model - - name: validate-model - params: - - name: train-model-trname - value: $(tasks.train-model.results.taskrun-name) - taskSpec: - steps: - - name: main - args: - - --model - - $(workspaces.validate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model - command: - - sh - - -c - - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location - 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m - pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' - --user) && "$0" "$@" - - sh - - -ec - - | - program_path=$(mktemp) - printf "%s" "$0" > "$program_path" - python3 -u "$program_path" "$@" - - | - def validate_model(model_file): - import pickle - - def load_pickle(object_file): - with open(object_file, "rb") as f: - target_object = pickle.load(f) - - return target_object - - model = load_pickle(model_file) - - input_values = [[5, 3, 1.6, 0.2]] - - print(f"Performing test prediction on {input_values}") - result = model.predict(input_values) - - print(f"Response: {result}") - - import argparse - _parser = argparse.ArgumentParser(prog='Validate model', description='') - _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) - _parsed_args = vars(_parser.parse_args()) - - _outputs = validate_model(**_parsed_args) - image: registry.access.redhat.com/ubi8/python-38 - env: - - name: ORIG_PR_NAME - valueFrom: - fieldRef: - fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] - params: - - name: train-model-trname - metadata: - labels: - pipelines.kubeflow.org/cache_enabled: "true" - annotations: - pipelines.kubeflow.org/component_spec_digest: '{"name": "Validate model", - "outputs": [], "version": "Validate model@sha256=53d18ff94fc8f164e7d8455f2c87fa7fdac17e7502502aaa52012e4247d089ee"}' - workspaces: - - name: validate-model - workspaces: - - name: validate-model - workspace: iris-pipeline - runAfter: - - train-model - workspaces: - - name: iris-pipeline - workspaces: - - name: iris-pipeline - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 2Gi + # PIPELINE DEFINITION + # Name: iris-training-pipeline + # Inputs: + # min_max_scaler: bool + # neighbors: int + # standard_scaler: bool + # Outputs: + # train-model-metrics: system.ClassificationMetrics + components: + comp-create-dataset: + executorLabel: exec-create-dataset + outputDefinitions: + artifacts: + iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-normalize-dataset: + executorLabel: exec-normalize-dataset + inputDefinitions: + artifacts: + input_iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + min_max_scaler: + parameterType: BOOLEAN + standard_scaler: + parameterType: BOOLEAN + outputDefinitions: + artifacts: + normalized_iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-train-model: + executorLabel: exec-train-model + inputDefinitions: + artifacts: + normalized_iris_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + n_neighbors: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + metrics: + artifactType: + schemaTitle: system.ClassificationMetrics + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + deploymentSpec: + executors: + exec-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef create_dataset(iris_dataset: Output[Dataset]):\n import pandas\ + \ as pd\n\n csv_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'\n\ + \ col_names = [\n 'Sepal_Length', 'Sepal_Width', 'Petal_Length',\ + \ 'Petal_Width', 'Labels'\n ]\n df = pd.read_csv(csv_url, names=col_names)\n\ + \n with open(iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" + image: quay.io/rmartine/data-science:test9 + exec-normalize-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - normalize_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\ + \ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\ + \ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\ + \ raise ValueError(\n 'Exactly one of standard_scaler\ + \ or min_max_scaler must be True.')\n\n import pandas as pd\n from\ + \ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\ + \ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\ + \ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\ + \ scaler = StandardScaler()\n if min_max_scaler:\n scaler\ + \ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\ + \ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\ + \ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\ + \ f:\n df.to_csv(f)\n\n" + image: quay.io/rmartine/data-science:test9 + exec-train-model: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - train_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef train_model(\n normalized_iris_dataset: Input[Dataset],\n\ + \ model: Output[Model],\n metrics: Output[ClassificationMetrics],\n\ + \ n_neighbors: int,\n):\n import pickle\n\n import pandas as pd\n\ + \ from sklearn.neighbors import KNeighborsClassifier\n\n from sklearn.metrics\ + \ import roc_curve\n from sklearn.model_selection import train_test_split,\ + \ cross_val_predict\n from sklearn.metrics import confusion_matrix\n\n\ + \n with open(normalized_iris_dataset.path) as f:\n df = pd.read_csv(f)\n\ + \n y = df.pop('Labels')\n X = df\n\n X_train, X_test, y_train,\ + \ y_test = train_test_split(X, y, random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ + \ clf.fit(X_train, y_train)\n\n predictions = cross_val_predict(\n\ + \ clf, X_train, y_train, cv=3)\n metrics.log_confusion_matrix(\n\ + \ ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica'],\n \ + \ confusion_matrix(\n y_train,\n predictions).tolist()\ + \ # .tolist() to convert np array to list.\n )\n\n model.metadata['framework']\ + \ = 'scikit-learn'\n with open(model.path, 'wb') as f:\n pickle.dump(clf,\ + \ f)\n\n" + image: quay.io/rmartine/data-science:test9 + pipelineInfo: + name: iris-training-pipeline + root: + dag: + outputs: + artifacts: + train-model-metrics: + artifactSelectors: + - outputArtifactKey: metrics + producerSubtask: train-model + tasks: + create-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-create-dataset + taskInfo: + name: create-dataset + normalize-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-normalize-dataset + dependentTasks: + - create-dataset + inputs: + artifacts: + input_iris_dataset: + taskOutputArtifact: + outputArtifactKey: iris_dataset + producerTask: create-dataset + parameters: + min_max_scaler: + runtimeValue: + constant: false + standard_scaler: + runtimeValue: + constant: true + taskInfo: + name: normalize-dataset + train-model: + cachingOptions: + enableCache: true + componentRef: + name: comp-train-model + dependentTasks: + - normalize-dataset + inputs: + artifacts: + normalized_iris_dataset: + taskOutputArtifact: + outputArtifactKey: normalized_iris_dataset + producerTask: normalize-dataset + parameters: + n_neighbors: + componentInputParameter: neighbors + taskInfo: + name: train-model + inputDefinitions: + parameters: + min_max_scaler: + parameterType: BOOLEAN + neighbors: + parameterType: NUMBER_INTEGER + standard_scaler: + parameterType: BOOLEAN + outputDefinitions: + artifacts: + train-model-metrics: + artifactType: + schemaTitle: system.ClassificationMetrics + schemaVersion: 0.0.1 + schemaVersion: 2.1.0 + sdkVersion: kfp-2.0.1 \ No newline at end of file diff --git a/controllers/testdata/declarative/case_6/config.yaml b/controllers/testdata/declarative/case_6/config.yaml new file mode 100644 index 000000000..a03247a37 --- /dev/null +++ b/controllers/testdata/declarative/case_6/config.yaml @@ -0,0 +1,10 @@ +# When a complete DSPA is deployed with (defaults specified) +Images: + ApiServer: api-server:test6 + Artifact: artifact-manager:test6 + PersistentAgent: persistenceagent:test6 + ScheduledWorkflow: scheduledworkflow:test6 + Cache: ubi-minimal:test6 + MoveResultsImage: busybox:test6 + MariaDB: mariadb:test6 + OAuthProxy: oauth-proxy:test6 diff --git a/controllers/testdata/declarative/case_6/deploy/cr.yaml b/controllers/testdata/declarative/case_6/deploy/cr.yaml new file mode 100644 index 000000000..50b3565ff --- /dev/null +++ b/controllers/testdata/declarative/case_6/deploy/cr.yaml @@ -0,0 +1,89 @@ +apiVersion: datasciencepipelinesapplications.opendatahub.io/v1alpha1 +kind: DataSciencePipelinesApplication +metadata: + name: testdsp6 +spec: + apiServer: + deploy: true + image: api-server:test6 + applyTektonCustomResource: true + archiveLogs: false + artifactImage: artifact-manager:test6 + cacheImage: ubi-minimal:test6 + moveResultsImage: busybox:test6 + injectDefaultScript: true + stripEOF: true + enableOauth: true + enableSamplePipeline: true + terminateStatus: Cancelled + trackArtifacts: true + dbConfigConMaxLifetimeSec: 125 + collectMetrics: true + autoUpdatePipelineDefaultVersion: true + resources: + requests: + cpu: "1231m" + memory: "1Gi" + limits: + cpu: "2522m" + memory: "5Gi" + persistenceAgent: + deploy: true + image: persistenceagent:test6 + numWorkers: 5 + resources: + requests: + cpu: "1233m" + memory: "1Gi" + limits: + cpu: "2524m" + memory: "5Gi" + scheduledWorkflow: + deploy: true + image: scheduledworkflow:test6 + cronScheduleTimezone: EST + resources: + requests: + cpu: "1235m" + memory: "1Gi" + limits: + cpu: "2526m" + memory: "5Gi" + mlpipelineUI: + deploy: true + image: frontend:test6 + configMap: some-test-configmap + resources: + requests: + cpu: "1239m" + memory: "1Gi" + limits: + cpu: "2530m" + memory: "5Gi" + database: + mariaDB: + deploy: true + image: mariadb:test6 + username: testuser + pipelineDBName: randomDBName + pvcSize: 32Gi + resources: + requests: + cpu: "1212m" + memory: "1Gi" + limits: + cpu: "2554m" + memory: "5Gi" + objectStorage: + minio: + deploy: true + image: minio:test6 + bucket: mlpipeline + pvcSize: 40Gi + resources: + requests: + cpu: "1334m" + memory: "1Gi" + limits: + cpu: "2535m" + memory: "5Gi" diff --git a/controllers/testdata/declarative/case_6/expected/apiserver_deployment.yaml b/controllers/testdata/declarative/case_6/expected/apiserver_deployment.yaml new file mode 100644 index 000000000..bdf2f827e --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/apiserver_deployment.yaml @@ -0,0 +1,214 @@ +apiVersion: apps/v1ds-pipeline-testdsp6 +kind: Deployment +metadata: + name: ds-pipeline-testdsp6 + namespace: default + labels: + app: ds-pipeline-testdsp6 + component: data-science-pipelines + dspa: testdsp2 +spec: + selector: + matchLabels: + app: ds-pipeline-testdsp6 + component: data-science-pipelines + dspa: testdsp2 + template: + metadata: + labels: + app: ds-pipeline-testdsp6 + component: data-science-pipelines + dspa: testdsp2 + spec: + containers: + - env: + - name: POD_NAMESPACE + value: "default" + - name: DBCONFIG_USER + value: "testuser" + - name: DBCONFIG_PASSWORD + valueFrom: + secretKeyRef: + key: "password" + name: "ds-pipeline-db-testdsp2" + - name: DBCONFIG_DBNAME + value: "randomDBName" + - name: DBCONFIG_HOST + value: "mariadb-testdsp2.default.svc.cluster.local" + - name: DBCONFIG_PORT + value: "3306" + - name: ARTIFACT_BUCKET + value: "mlpipeline" + - name: ARTIFACT_ENDPOINT + value: "http://minio-testdsp2.default.svc.cluster.local:9000" + - name: ARTIFACT_SCRIPT + valueFrom: + configMapKeyRef: + key: "artifact_script" + name: "ds-pipeline-artifact-script-testdsp2" + - name: ARTIFACT_IMAGE + value: "artifact-manager:test2" + - name: ARCHIVE_LOGS + value: "false" + - name: EXECUTIONTYPE + value: PipelineRun + - name: TRACK_ARTIFACTS + value: "true" + - name: STRIP_EOF + value: "true" + - name: PIPELINE_RUNTIME + value: "tekton" + - name: DEFAULTPIPELINERUNNERSERVICEACCOUNT + value: "pipeline-runner-testdsp2" + - name: INJECT_DEFAULT_SCRIPT + value: "true" + - name: APPLY_TEKTON_CUSTOM_RESOURCE + value: "true" + - name: TERMINATE_STATUS + value: "Cancelled" + - name: AUTO_UPDATE_PIPELINE_DEFAULT_VERSION + value: "true" + - name: DBCONFIG_CONMAXLIFETIMESEC + value: "125" + - name: ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST + value: "ds-pipeline-visualizationserver" + - name: ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT + value: "8888" + - name: OBJECTSTORECONFIG_BUCKETNAME + value: "mlpipeline" + - name: OBJECTSTORECONFIG_ACCESSKEY + valueFrom: + secretKeyRef: + key: "accesskey" + name: "mlpipeline-minio-artifact" + - name: OBJECTSTORECONFIG_SECRETACCESSKEY + valueFrom: + secretKeyRef: + key: "secretkey" + name: "mlpipeline-minio-artifact" + - name: OBJECTSTORECONFIG_SECURE + value: "false" + - name: MINIO_SERVICE_SERVICE_HOST + value: "minio-testdsp2.default.svc.cluster.local" + - name: MINIO_SERVICE_SERVICE_PORT + value: "9000" + - name: CACHE_IMAGE + value: "ubi-minimal:test2" + - name: MOVERESULTS_IMAGE + value: "busybox:test2" + - name: METADATA_GRPC_SERVICE_SERVICE_HOST + value: "ds-pipeline-metadata-grpc-testdsp2.default.svc.cluster.local" + - name: METADATA_GRPC_SERVICE_SERVICE_PORT + value: "8080" + - name: ML_PIPELINE_SERVICE_HOST + value: ds-pipeline-testdsp6.default.svc.cluster.local + - name: ML_PIPELINE_SERVICE_PORT_GRPC + value: "8887" + image: api-server:test2 + imagePullPolicy: Always + name: ds-pipeline-api-server + ports: + - containerPort: 8888 + name: http + protocol: TCP + - containerPort: 8887 + name: grpc + protocol: TCP + livenessProbe: + exec: + command: + - wget + - -q + - -S + - -O + - '-' + - http://localhost:8888/apis/v1beta1/healthz + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + readinessProbe: + exec: + command: + - wget + - -q + - -S + - -O + - '-' + - http://localhost:8888/apis/v1beta1/healthz + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + resources: + requests: + cpu: 1231m + memory: 1Gi + limits: + cpu: 2522m + memory: 5Gi + volumeMounts: + - mountPath: /config/sample_config.json + name: sample-config + subPath: sample_config.json + - mountPath: /samples/ + name: sample-pipeline + - name: oauth-proxy + args: + - --https-address=:8443 + - --provider=openshift + - --openshift-service-account=ds-pipeline-testdsp6 + - --upstream=http://localhost:8888 + - --tls-cert=/etc/tls/private/tls.crt + - --tls-key=/etc/tls/private/tls.key + - --cookie-secret=SECRET + - '--openshift-delegate-urls={"/": {"group":"route.openshift.io","resource":"routes","verb":"get","name":"ds-pipeline-testdsp6","namespace":"default"}}' + - '--openshift-sar={"namespace":"default","resource":"routes","resourceName":"ds-pipeline-testdsp6","verb":"get","resourceAPIGroup":"route.openshift.io"}' + - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' + image: oauth-proxy:test2 + ports: + - containerPort: 8443 + name: oauth + protocol: TCP + livenessProbe: + httpGet: + path: /oauth/healthz + port: oauth + scheme: HTTPS + initialDelaySeconds: 30 + timeoutSeconds: 1 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /oauth/healthz + port: oauth + scheme: HTTPS + initialDelaySeconds: 5 + timeoutSeconds: 1 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 100m + memory: 256Mi + volumeMounts: + - mountPath: /etc/tls/private + name: proxy-tls + volumes: + - name: proxy-tls + secret: + secretName: ds-pipelines-proxy-tls-testdsp2 + defaultMode: 420 + - configMap: + defaultMode: 420 + name: sample-config-testdsp2 + name: sample-config + - configMap: + defaultMode: 420 + name: sample-pipeline-testdsp2 + name: sample-pipeline + serviceAccountName: ds-pipeline-testdsp6 diff --git a/controllers/testdata/declarative/case_6/expected/configmap_artifact_script.yaml b/controllers/testdata/declarative/case_6/expected/configmap_artifact_script.yaml new file mode 100644 index 000000000..9294a70e1 --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/configmap_artifact_script.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +data: + artifact_script: |- + #!/usr/bin/env sh + push_artifact() { + workspace_dir=$(echo $(context.taskRun.name) | sed -e "s/$(context.pipeline.name)-//g") + workspace_dest=/workspace/${workspace_dir}/artifacts/$(context.pipelineRun.name)/$(context.taskRun.name) + artifact_name=$(basename $2) + if [ -f "$workspace_dest/$artifact_name" ]; then + echo sending to: ${workspace_dest}/${artifact_name} + tar -cvzf $1.tgz -C ${workspace_dest} ${artifact_name} + aws s3 --endpoint http://minio-testdsp2.default.svc.cluster.local:9000 cp $1.tgz s3://mlpipeline/artifacts/$PIPELINERUN/$PIPELINETASK/$1.tgz + elif [ -f "$2" ]; then + tar -cvzf $1.tgz -C $(dirname $2) ${artifact_name} + aws s3 --endpoint http://minio-testdsp2.default.svc.cluster.local:9000 cp $1.tgz s3://mlpipeline/artifacts/$PIPELINERUN/$PIPELINETASK/$1.tgz + else + echo "$2 file does not exist. Skip artifact tracking for $1" + fi + } + push_log() { + cat /var/log/containers/$PODNAME*$NAMESPACE*step-main*.log > step-main.log + push_artifact main-log step-main.log + } + strip_eof() { + if [ -f "$2" ]; then + awk 'NF' $2 | head -c -1 > $1_temp_save && cp $1_temp_save $2 + fi + } +kind: ConfigMap +metadata: + name: ds-pipeline-artifact-script-testdsp2 + namespace: default + labels: + app: ds-pipeline-testdsp6 + component: data-science-pipelines diff --git a/controllers/testdata/declarative/case_6/expected/mariadb_deployment.yaml b/controllers/testdata/declarative/case_6/expected/mariadb_deployment.yaml new file mode 100644 index 000000000..e1a326516 --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/mariadb_deployment.yaml @@ -0,0 +1,79 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mariadb-testdsp6 + namespace: default + labels: + app: mariadb-testdsp6 + component: data-science-pipelines + dspa: testdsp6 +spec: + strategy: + type: Recreate # Need this since backing PVC is ReadWriteOnce, which creates resource lock condition in default Rolling strategy + selector: + matchLabels: + app: mariadb-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + template: + metadata: + labels: + app: mariadb-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + spec: + containers: + - name: mariadb + image: mariadb:test2 + ports: + - containerPort: 3306 + protocol: TCP + readinessProbe: + exec: + command: + - /bin/sh + - "-i" + - "-c" + - >- + MYSQL_PWD=$MYSQL_PASSWORD mysql -h 127.0.0.1 -u $MYSQL_USER -D + $MYSQL_DATABASE -e 'SELECT 1' + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + tcpSocket: + port: 3306 + timeoutSeconds: 1 + env: + - name: MYSQL_USER + value: "testuser" + - name: MYSQL_PASSWORD + valueFrom: + secretKeyRef: + key: "password" + name: "ds-pipeline-db-testdsp6" + - name: MYSQL_DATABASE + value: "randomDBName" + - name: MYSQL_ALLOW_EMPTY_PASSWORD + value: "true" + resources: + requests: + cpu: 1212m + memory: 1Gi + limits: + cpu: 2554m + memory: 5Gi + volumeMounts: + - name: mariadb-persistent-storage + mountPath: /var/lib/mysql + volumes: + - name: mariadb-persistent-storage + persistentVolumeClaim: + claimName: mariadb-testdsp6 diff --git a/controllers/testdata/declarative/case_6/expected/minio_deployment.yaml b/controllers/testdata/declarative/case_6/expected/minio_deployment.yaml new file mode 100644 index 000000000..8f8b3b930 --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/minio_deployment.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: minio-testdsp6 + namespace: default + labels: + app: minio-testdsp6 + component: data-science-pipelines + dspa: testdsp6 +spec: + selector: + matchLabels: + app: minio-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + strategy: + type: Recreate + template: + metadata: + labels: + app: minio-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + spec: + containers: + - args: + - server + - /data + env: + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + key: "accesskey" + name: "mlpipeline-minio-artifact" + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + key: "secretkey" + name: "mlpipeline-minio-artifact" + image: minio:test2 + name: minio + ports: + - containerPort: 9000 + protocol: TCP + livenessProbe: + tcpSocket: + port: 9000 + initialDelaySeconds: 30 + timeoutSeconds: 1 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + tcpSocket: + port: 9000 + initialDelaySeconds: 5 + timeoutSeconds: 1 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + resources: + requests: + cpu: 1334m + memory: 1Gi + limits: + cpu: 2535m + memory: 5Gi + volumeMounts: + - mountPath: /data + name: data + subPath: minio + volumes: + - name: data + persistentVolumeClaim: + claimName: minio-testdsp6 diff --git a/controllers/testdata/declarative/case_6/expected/mlpipelines-ui_deployment.yaml b/controllers/testdata/declarative/case_6/expected/mlpipelines-ui_deployment.yaml new file mode 100644 index 000000000..3faec65f6 --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/mlpipelines-ui_deployment.yaml @@ -0,0 +1,153 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ds-pipeline-ui-testdsp6 + namespace: default + labels: + app: ds-pipeline-ui-testdsp6 + component: data-science-pipelines + dspa: testdsp6 +spec: + selector: + matchLabels: + app: ds-pipeline-ui-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + labels: + app: ds-pipeline-ui-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + spec: + containers: + - env: + - name: VIEWER_TENSORBOARD_POD_TEMPLATE_SPEC_PATH + value: /etc/config/viewer-pod-template.json + - name: MINIO_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + key: "accesskey" + name: "mlpipeline-minio-artifact" + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + key: "secretkey" + name: "mlpipeline-minio-artifact" + - name: ALLOW_CUSTOM_VISUALIZATIONS + value: "true" + - name: ARGO_ARCHIVE_LOGS + value: "true" + - name: ML_PIPELINE_SERVICE_HOST + value: ds-pipeline-testdsp6 + - name: ML_PIPELINE_SERVICE_PORT + value: '8888' + - name: METADATA_ENVOY_SERVICE_SERVICE_HOST + value: ds-pipeline-metadata-envoy-testdsp6 + - name: METADATA_ENVOY_SERVICE_SERVICE_PORT + value: "9090" + image: frontend:test2 + imagePullPolicy: IfNotPresent + livenessProbe: + exec: + command: + - wget + - -q + - -S + - -O + - '-' + - http://localhost:3000/apis/v1beta1/healthz + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + name: ds-pipeline-ui + ports: + - containerPort: 3000 + protocol: TCP + readinessProbe: + exec: + command: + - wget + - -q + - -S + - -O + - '-' + - http://localhost:3000/apis/v1beta1/healthz + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + resources: + requests: + cpu: 1239m + memory: 1Gi + limits: + cpu: 2530m + memory: 5Gi + volumeMounts: + - mountPath: /etc/config + name: config-volume + readOnly: true + - name: oauth-proxy + args: + - --https-address=:8443 + - --provider=openshift + - --openshift-service-account=ds-pipeline-ui-testdsp6 + - --upstream=http://localhost:3000 + - --tls-cert=/etc/tls/private/tls.crt + - --tls-key=/etc/tls/private/tls.key + - --cookie-secret=SECRET + - '--openshift-delegate-urls={"/": {"group":"route.openshift.io","resource":"routes","verb":"get","name":"ds-pipeline-ui-testdsp6","namespace":"default"}}' + - '--openshift-sar={"namespace":"default","resource":"routes","resourceName":"ds-pipeline-ui-testdsp6","verb":"get","resourceAPIGroup":"route.openshift.io"}' + - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' + image: oauth-proxy:test2 + ports: + - containerPort: 8443 + name: https + protocol: TCP + livenessProbe: + httpGet: + path: /oauth/healthz + port: 8443 + scheme: HTTPS + initialDelaySeconds: 30 + timeoutSeconds: 1 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /oauth/healthz + port: 8443 + scheme: HTTPS + initialDelaySeconds: 5 + timeoutSeconds: 1 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 100m + memory: 256Mi + volumeMounts: + - mountPath: /etc/tls/private + name: proxy-tls + serviceAccountName: ds-pipeline-ui-testdsp6 + volumes: + - configMap: + name: some-test-configmap + defaultMode: 420 + name: config-volume + - name: proxy-tls + secret: + secretName: ds-pipelines-ui-proxy-tls-testdsp6 + defaultMode: 420 diff --git a/controllers/testdata/declarative/case_6/expected/persistence-agent_deployment.yaml b/controllers/testdata/declarative/case_6/expected/persistence-agent_deployment.yaml new file mode 100644 index 000000000..afed69995 --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/persistence-agent_deployment.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ds-pipeline-persistenceagent-testdsp6 + namespace: default + labels: + app: ds-pipeline-persistenceagent-testdsp6 + component: data-science-pipelines + dspa: testdsp6 +spec: + selector: + matchLabels: + app: ds-pipeline-persistenceagent-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + labels: + app: ds-pipeline-persistenceagent-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + spec: + containers: + - env: + - name: NAMESPACE + value: "default" + - name: TTL_SECONDS_AFTER_WORKFLOW_FINISH + value: "86400" + - name: NUM_WORKERS + value: "2" + - name: KUBEFLOW_USERID_HEADER + value: kubeflow-userid + - name: KUBEFLOW_USERID_PREFIX + value: "" + - name: EXECUTIONTYPE + value: PipelineRun + image: persistenceagent:test2 + imagePullPolicy: IfNotPresent + name: ds-pipeline-persistenceagent + command: + - persistence_agent + - "--logtostderr=true" + - "--ttlSecondsAfterWorkflowFinish=86400" + - "--numWorker=5" + - "--mlPipelineAPIServerName=ds-pipeline-testdsp6" + - "--namespace=testdsp6" + - "--mlPipelineServiceHttpPort=8888" + - "--mlPipelineServiceGRPCPort=8887" + livenessProbe: + exec: + command: + - test + - -x + - persistence_agent + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 2 + readinessProbe: + exec: + command: + - test + - -x + - persistence_agent + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + resources: + requests: + cpu: 1233m + memory: 1Gi + limits: + cpu: 2524m + memory: 5Gi + serviceAccountName: ds-pipeline-persistenceagent-testdsp6 diff --git a/controllers/testdata/declarative/case_6/expected/sample-config.yaml.tmpl b/controllers/testdata/declarative/case_6/expected/sample-config.yaml.tmpl new file mode 100644 index 000000000..f5ca8011c --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/sample-config.yaml.tmpl @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: sample-config-testdsp6 + namespace: default + labels: + app: ds-pipeline-testdsp6 + component: data-science-pipelines +data: + sample_config.json: |- + [ + { + "name": "[Demo] iris-training", + "description": "[source code](https://github.com/opendatahub-io/data-science-pipelines/tree/master/samples/iris-sklearn) A simple pipeline to demonstrate a basic ML Training workflow", + "file": "/samples/iris-pipeline-compiled.yaml" + } + ] diff --git a/controllers/testdata/declarative/case_6/expected/sample-pipeline.yaml.tmpl b/controllers/testdata/declarative/case_6/expected/sample-pipeline.yaml.tmpl new file mode 100644 index 000000000..1ebebf0cc --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/sample-pipeline.yaml.tmpl @@ -0,0 +1,554 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: sample-pipeline-testdsp6 + namespace: default + labels: + app: ds-pipeline-testdsp6 + component: data-science-pipelines +data: + iris-pipeline-compiled.yaml: |- + apiVersion: tekton.dev/v1beta1 + kind: PipelineRun + metadata: + name: iris-pipeline + annotations: + tekton.dev/output_artifacts: '{"data-prep": [{"key": "artifacts/$PIPELINERUN/data-prep/X_test.tgz", + "name": "data-prep-X_test", "path": "/tmp/outputs/X_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/X_train.tgz", + "name": "data-prep-X_train", "path": "/tmp/outputs/X_train/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_test.tgz", + "name": "data-prep-y_test", "path": "/tmp/outputs/y_test/data"}, {"key": "artifacts/$PIPELINERUN/data-prep/y_train.tgz", + "name": "data-prep-y_train", "path": "/tmp/outputs/y_train/data"}], "evaluate-model": + [{"key": "artifacts/$PIPELINERUN/evaluate-model/mlpipeline-metrics.tgz", "name": + "mlpipeline-metrics", "path": "/tmp/outputs/mlpipeline_metrics/data"}], "train-model": + [{"key": "artifacts/$PIPELINERUN/train-model/model.tgz", "name": "train-model-model", + "path": "/tmp/outputs/model/data"}]}' + tekton.dev/input_artifacts: '{"evaluate-model": [{"name": "data-prep-X_test", + "parent_task": "data-prep"}, {"name": "data-prep-y_test", "parent_task": "data-prep"}, + {"name": "train-model-model", "parent_task": "train-model"}], "train-model": + [{"name": "data-prep-X_train", "parent_task": "data-prep"}, {"name": "data-prep-y_train", + "parent_task": "data-prep"}], "validate-model": [{"name": "train-model-model", + "parent_task": "train-model"}]}' + tekton.dev/artifact_bucket: mlpipeline + tekton.dev/artifact_endpoint: minio-service.kubeflow:9000 + tekton.dev/artifact_endpoint_scheme: http:// + tekton.dev/artifact_items: '{"data-prep": [["X_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test"], + ["X_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train"], + ["y_test", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test"], + ["y_train", "$(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train"]], + "evaluate-model": [["mlpipeline-metrics", "/tmp/outputs/mlpipeline_metrics/data"]], + "train-model": [["model", "$(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model"]], + "validate-model": []}' + sidecar.istio.io/inject: "false" + tekton.dev/template: '' + pipelines.kubeflow.org/big_data_passing_format: $(workspaces.$TASK_NAME.path)/artifacts/$ORIG_PR_NAME/$TASKRUN_NAME/$TASK_PARAM_NAME + pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "iris-model", "name": + "model_obc", "optional": true, "type": "String"}], "name": "Iris Pipeline"}' + labels: + pipelines.kubeflow.org/pipelinename: '' + pipelines.kubeflow.org/generation: '' + spec: + params: + - name: model_obc + value: iris-model + pipelineSpec: + params: + - name: model_obc + default: iris-model + tasks: + - name: data-prep + taskSpec: + steps: + - name: main + args: + - --X-train + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train + - --X-test + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test + - --y-train + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train + - --y-test + - $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def _make_parent_dirs_and_return_path(file_path: str): + import os + os.makedirs(os.path.dirname(file_path), exist_ok=True) + return file_path + + def data_prep( + X_train_file, + X_test_file, + y_train_file, + y_test_file, + ): + import pickle + + import pandas as pd + + from sklearn import datasets + from sklearn.model_selection import train_test_split + + def get_iris_data(): + iris = datasets.load_iris() + data = pd.DataFrame( + { + "sepalLength": iris.data[:, 0], + "sepalWidth": iris.data[:, 1], + "petalLength": iris.data[:, 2], + "petalWidth": iris.data[:, 3], + "species": iris.target, + } + ) + + print("Initial Dataset:") + print(data.head()) + + return data + + def create_training_set(dataset, test_size = 0.3): + # Features + X = dataset[["sepalLength", "sepalWidth", "petalLength", "petalWidth"]] + # Labels + y = dataset["species"] + + # Split dataset into training set and test set + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=test_size, random_state=11 + ) + + return X_train, X_test, y_train, y_test + + def save_pickle(object_file, target_object): + with open(object_file, "wb") as f: + pickle.dump(target_object, f) + + dataset = get_iris_data() + X_train, X_test, y_train, y_test = create_training_set(dataset) + + save_pickle(X_train_file, X_train) + save_pickle(X_test_file, X_test) + save_pickle(y_train_file, y_train) + save_pickle(y_test_file, y_test) + + import argparse + _parser = argparse.ArgumentParser(prog='Data prep', description='') + _parser.add_argument("--X-train", dest="X_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--X-test", dest="X_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-train", dest="y_train_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-test", dest="y_test_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = data_prep(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: output-taskrun-name + command: + - sh + - -ec + - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: copy-results-artifacts + command: + - sh + - -ec + - | + set -exo pipefail + TOTAL_SIZE=0 + copy_artifact() { + if [ -d "$1" ]; then + tar -czvf "$1".tar.gz "$1" + SUFFIX=".tar.gz" + fi + ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` + TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) + touch "$2" + if [[ $TOTAL_SIZE -lt 3072 ]]; then + if [ -d "$1" ]; then + tar -tzf "$1".tar.gz > "$2" + elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then + cp "$1" "$2" + fi + fi + } + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_train $(results.X-train.path) + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/X_test $(results.X-test.path) + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_train $(results.y-train.path) + copy_artifact $(workspaces.data-prep.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/y_test $(results.y-test.path) + onError: continue + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + results: + - name: X-test + description: /tmp/outputs/X_test/data + - name: X-train + description: /tmp/outputs/X_train/data + - name: taskrun-name + - name: y-test + description: /tmp/outputs/y_test/data + - name: y-train + description: /tmp/outputs/y_train/data + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Data prep", "outputs": + [{"name": "X_train"}, {"name": "X_test"}, {"name": "y_train"}, {"name": + "y_test"}], "version": "Data prep@sha256=5aeb512900f57983c9f643ec30ddb4ccc66490a443269b51ce0a67d57cb373b0"}' + workspaces: + - name: data-prep + workspaces: + - name: data-prep + workspace: iris-pipeline + - name: train-model + params: + - name: data-prep-trname + value: $(tasks.data-prep.results.taskrun-name) + taskSpec: + steps: + - name: main + args: + - --X-train + - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_train + - --y-train + - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_train + - --model + - $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def _make_parent_dirs_and_return_path(file_path: str): + import os + os.makedirs(os.path.dirname(file_path), exist_ok=True) + return file_path + + def train_model( + X_train_file, + y_train_file, + model_file, + ): + import pickle + + from sklearn.ensemble import RandomForestClassifier + + def load_pickle(object_file): + with open(object_file, "rb") as f: + target_object = pickle.load(f) + + return target_object + + def save_pickle(object_file, target_object): + with open(object_file, "wb") as f: + pickle.dump(target_object, f) + + def train_iris(X_train, y_train): + model = RandomForestClassifier(n_estimators=100) + model.fit(X_train, y_train) + + return model + + X_train = load_pickle(X_train_file) + y_train = load_pickle(y_train_file) + + model = train_iris(X_train, y_train) + + save_pickle(model_file, model) + + import argparse + _parser = argparse.ArgumentParser(prog='Train model', description='') + _parser.add_argument("--X-train", dest="X_train_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-train", dest="y_train_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--model", dest="model_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = train_model(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: output-taskrun-name + command: + - sh + - -ec + - echo -n "$(context.taskRun.name)" > "$(results.taskrun-name.path)" + - image: registry.access.redhat.com/ubi8/ubi-minimal + name: copy-results-artifacts + command: + - sh + - -ec + - | + set -exo pipefail + TOTAL_SIZE=0 + copy_artifact() { + if [ -d "$1" ]; then + tar -czvf "$1".tar.gz "$1" + SUFFIX=".tar.gz" + fi + ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'` + TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE) + touch "$2" + if [[ $TOTAL_SIZE -lt 3072 ]]; then + if [ -d "$1" ]; then + tar -tzf "$1".tar.gz > "$2" + elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then + cp "$1" "$2" + fi + fi + } + copy_artifact $(workspaces.train-model.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/model $(results.model.path) + onError: continue + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + params: + - name: data-prep-trname + results: + - name: model + description: /tmp/outputs/model/data + - name: taskrun-name + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Train model", + "outputs": [{"name": "model"}], "version": "Train model@sha256=cb1fbd399ee5849dcdfaafced23a0496cae1d5861795062b22512b766ec418ce"}' + workspaces: + - name: train-model + workspaces: + - name: train-model + workspace: iris-pipeline + runAfter: + - data-prep + - data-prep + - name: evaluate-model + params: + - name: data-prep-trname + value: $(tasks.data-prep.results.taskrun-name) + - name: train-model-trname + value: $(tasks.train-model.results.taskrun-name) + taskSpec: + steps: + - name: main + args: + - --X-test + - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/X_test + - --y-test + - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.data-prep-trname)/y_test + - --model + - $(workspaces.evaluate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model + - --mlpipeline-metrics + - /tmp/outputs/mlpipeline_metrics/data + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def _make_parent_dirs_and_return_path(file_path: str): + import os + os.makedirs(os.path.dirname(file_path), exist_ok=True) + return file_path + + def evaluate_model( + X_test_file, + y_test_file, + model_file, + mlpipeline_metrics_file, + ): + import json + import pickle + + from sklearn.metrics import accuracy_score + + def load_pickle(object_file): + with open(object_file, "rb") as f: + target_object = pickle.load(f) + + return target_object + + X_test = load_pickle(X_test_file) + y_test = load_pickle(y_test_file) + model = load_pickle(model_file) + + y_pred = model.predict(X_test) + + accuracy_score_metric = accuracy_score(y_test, y_pred) + print(f"Accuracy: {accuracy_score_metric}") + + metrics = { + "metrics": [ + { + "name": "accuracy-score", + "numberValue": accuracy_score_metric, + "format": "PERCENTAGE", + }, + ] + } + + with open(mlpipeline_metrics_file, "w") as f: + json.dump(metrics, f) + + import argparse + _parser = argparse.ArgumentParser(prog='Evaluate model', description='') + _parser.add_argument("--X-test", dest="X_test_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--y-test", dest="y_test_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) + _parser.add_argument("--mlpipeline-metrics", dest="mlpipeline_metrics_file", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = evaluate_model(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + params: + - name: data-prep-trname + - name: train-model-trname + stepTemplate: + volumeMounts: + - name: mlpipeline-metrics + mountPath: /tmp/outputs/mlpipeline_metrics + volumes: + - name: mlpipeline-metrics + emptyDir: {} + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Evaluate model", + "outputs": [{"name": "mlpipeline_metrics", "type": "Metrics"}], "version": + "Evaluate model@sha256=f398e65faecc6f5a4ba11a2c78d8a2274e3ede205a0e199c8bb615531a3abd4a"}' + workspaces: + - name: evaluate-model + workspaces: + - name: evaluate-model + workspace: iris-pipeline + runAfter: + - data-prep + - data-prep + - train-model + - name: validate-model + params: + - name: train-model-trname + value: $(tasks.train-model.results.taskrun-name) + taskSpec: + steps: + - name: main + args: + - --model + - $(workspaces.validate-model.path)/artifacts/$ORIG_PR_NAME/$(params.train-model-trname)/model + command: + - sh + - -c + - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location + 'pandas' 'scikit-learn' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m + pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn' + --user) && "$0" "$@" + - sh + - -ec + - | + program_path=$(mktemp) + printf "%s" "$0" > "$program_path" + python3 -u "$program_path" "$@" + - | + def validate_model(model_file): + import pickle + + def load_pickle(object_file): + with open(object_file, "rb") as f: + target_object = pickle.load(f) + + return target_object + + model = load_pickle(model_file) + + input_values = [[5, 3, 1.6, 0.2]] + + print(f"Performing test prediction on {input_values}") + result = model.predict(input_values) + + print(f"Response: {result}") + + import argparse + _parser = argparse.ArgumentParser(prog='Validate model', description='') + _parser.add_argument("--model", dest="model_file", type=str, required=True, default=argparse.SUPPRESS) + _parsed_args = vars(_parser.parse_args()) + + _outputs = validate_model(**_parsed_args) + image: registry.access.redhat.com/ubi8/python-38 + env: + - name: ORIG_PR_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['custom.tekton.dev/originalPipelineRun'] + params: + - name: train-model-trname + metadata: + labels: + pipelines.kubeflow.org/cache_enabled: "true" + annotations: + pipelines.kubeflow.org/component_spec_digest: '{"name": "Validate model", + "outputs": [], "version": "Validate model@sha256=53d18ff94fc8f164e7d8455f2c87fa7fdac17e7502502aaa52012e4247d089ee"}' + workspaces: + - name: validate-model + workspaces: + - name: validate-model + workspace: iris-pipeline + runAfter: + - train-model + workspaces: + - name: iris-pipeline + workspaces: + - name: iris-pipeline + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi diff --git a/controllers/testdata/declarative/case_6/expected/scheduled-workflow_deployment.yaml b/controllers/testdata/declarative/case_6/expected/scheduled-workflow_deployment.yaml new file mode 100644 index 000000000..e0037fb35 --- /dev/null +++ b/controllers/testdata/declarative/case_6/expected/scheduled-workflow_deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ds-pipeline-scheduledworkflow-testdsp6 + namespace: default + labels: + app: ds-pipeline-scheduledworkflow-testdsp6 + component: data-science-pipelines + dspa: testdsp6 +spec: + selector: + matchLabels: + app: ds-pipeline-scheduledworkflow-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + labels: + app: ds-pipeline-scheduledworkflow-testdsp6 + component: data-science-pipelines + dspa: testdsp6 + spec: + containers: + - env: + - name: NAMESPACE + value: "default" + - name: CRON_SCHEDULE_TIMEZONE + value: "EST" + - name: EXECUTIONTYPE + value: PipelineRun + image: scheduledworkflow:test2 + imagePullPolicy: IfNotPresent + name: ds-pipeline-scheduledworkflow + command: + - controller + - "--logtostderr=true" + - "--namespace=default" + livenessProbe: + exec: + command: + - test + - -x + - controller + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 2 + readinessProbe: + exec: + command: + - test + - -x + - controller + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + resources: + requests: + cpu: 1235m + memory: 1Gi + limits: + cpu: 2526m + memory: 5Gi + serviceAccountName: ds-pipeline-scheduledworkflow-testdsp6