Fix ruff format/lint issues

Signed-off-by: Giulio Frasca <gfrasca@redhat.com>
opendatahub-io · Dec 20, 2024 · 7c41734 · 7c41734
1 parent 7690ced
commit 7c41734
Show file tree

Hide file tree

Showing 8 changed files with 53 additions and 37 deletions.
diff --git a/eval/final/components.py b/eval/final/components.py
@@ -23,9 +23,9 @@ def run_final_eval_op(
 ):
     import json
     import os
-    import httpx
     import subprocess
 
+    import httpx
     import torch
     from instructlab.eval.mmlu import MMLUBranchEvaluator
     from instructlab.eval.mt_bench import MTBenchBranchEvaluator
@@ -35,7 +35,9 @@ def run_final_eval_op(
     judge_model_name = os.getenv("JUDGE_NAME")
     judge_endpoint = os.getenv("JUDGE_ENDPOINT")
     judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
-    use_tls = os.path.exists(judge_ca_cert_path) and (os.path.getsize(judge_ca_cert_path) > 0)
+    use_tls = os.path.exists(judge_ca_cert_path) and (
+        os.path.getsize(judge_ca_cert_path) > 0
+    )
     judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None
 
     print("Starting Final Eval...")

diff --git a/eval/mt_bench/components.py b/eval/mt_bench/components.py
@@ -1,6 +1,6 @@
 # type: ignore
 # pylint: disable=no-value-for-parameter,import-outside-toplevel,import-error
-from typing import List, NamedTuple, Optional
+from typing import NamedTuple, Optional
 
 from kfp.dsl import component
 
@@ -20,17 +20,19 @@ def run_mt_bench_op(
 ) -> NamedTuple("outputs", best_model=str, best_score=float):
     import json
     import os
-    import httpx
     import subprocess
 
+    import httpx
     import torch
     from instructlab.eval.mt_bench import MTBenchEvaluator
 
     judge_api_key = os.getenv("JUDGE_API_KEY", "")
     judge_model_name = os.getenv("JUDGE_NAME")
     judge_endpoint = os.getenv("JUDGE_ENDPOINT")
     judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH")
-    use_tls = os.path.exists(judge_ca_cert_path) and (os.path.getsize(judge_ca_cert_path) > 0)
+    use_tls = os.path.exists(judge_ca_cert_path) and (
+        os.path.getsize(judge_ca_cert_path) > 0
+    )
     judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None
 
     def launch_vllm(

diff --git a/pipeline.py b/pipeline.py
@@ -1,5 +1,6 @@
 # type: ignore
 # pylint: disable=no-value-for-parameter,import-outside-toplevel,import-error,no-member
+import os
 import typing
 from typing import List, Literal, Optional
 
@@ -9,13 +10,11 @@
     CreatePVC,
     DeletePVC,
     mount_pvc,
-    set_image_pull_policy,
     use_config_map_as_env,
     use_config_map_as_volume,
     use_secret_as_env,
     use_secret_as_volume,
 )
-import os
 
 TEACHER_CONFIG_MAP = "teacher-server"
 TEACHER_SECRET = "teacher-server"
@@ -104,7 +103,6 @@ def pipeline(
         sdg_pipeline: str = "full",  # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L122
         sdg_max_batch_len: int = 5000,  # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L334
         sdg_sample_size: float = 1.0,  # FIXME: Not present in default config. Not configurable upstream at this point, capability added via https://github.com/instructlab/sdg/pull/432
-
         # Training phase
         train_nproc_per_node: int = 2,  # FIXME: Not present in default config. Arbitrary value chosen to demonstrate multi-node multi-gpu capabilities. Needs proper reference architecture justification.
         train_nnodes: int = 2,  # FIXME: Not present in default config. Arbitrary value chosen to demonstrate multi-node multi-gpu capabilities. Needs proper reference architecture justification.
@@ -122,13 +120,11 @@ def pipeline(
         # MT Bench
         mt_bench_max_workers: str = "auto",  # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L74
         mt_bench_merge_system_user_message: bool = False,  # https://github.com/instructlab/instructlab/blob/v0.21.2/src/instructlab/model/evaluate.py#L474
-
         # Final evaluation
         final_eval_max_workers: str = "auto",  # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L74
         final_eval_few_shots: int = 5,  # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L56
         final_eval_batch_size: str = "auto",  # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L52
         final_eval_merge_system_user_message: bool = False,  # https://github.com/instructlab/instructlab/blob/v0.21.2/src/instructlab/model/evaluate.py#L474
-
         # Other options
         k8s_storage_class_name: str = "standard",  # FIXME: https://github.com/kubeflow/pipelines/issues/11396, https://issues.redhat.com/browse/RHOAIRFE-470
     ):
@@ -201,8 +197,12 @@ def pipeline(
             sdg_task, TEACHER_CONFIG_MAP, dict(endpoint="endpoint", model="model")
         )
         use_secret_as_env(sdg_task, TEACHER_SECRET, {"api_key": "api_key"})
-        use_config_map_as_volume(sdg_task, TEACHER_CONFIG_MAP, mount_path=SDG_CA_CERT_PATH)
-        sdg_task.set_env_variable(SDG_CA_CERT_ENV_VAR_NAME, os.path.join(SDG_CA_CERT_PATH, SDG_CA_CERT_CM_KEY))
+        use_config_map_as_volume(
+            sdg_task, TEACHER_CONFIG_MAP, mount_path=SDG_CA_CERT_PATH
+        )
+        sdg_task.set_env_variable(
+            SDG_CA_CERT_ENV_VAR_NAME, os.path.join(SDG_CA_CERT_PATH, SDG_CA_CERT_CM_KEY)
+        )
 
         sdg_task.after(git_clone_task)
         mount_pvc(
@@ -366,8 +366,13 @@ def pipeline(
         )
         use_secret_as_env(run_mt_bench_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"})
 
-        use_config_map_as_volume(run_mt_bench_task, JUDGE_CONFIG_MAP, mount_path=JUDGE_CA_CERT_PATH)
-        run_mt_bench_task.set_env_variable(JUDGE_CA_CERT_ENV_VAR_NAME,  os.path.join(JUDGE_CA_CERT_PATH, JUDGE_CA_CERT_CM_KEY))
+        use_config_map_as_volume(
+            run_mt_bench_task, JUDGE_CONFIG_MAP, mount_path=JUDGE_CA_CERT_PATH
+        )
+        run_mt_bench_task.set_env_variable(
+            JUDGE_CA_CERT_ENV_VAR_NAME,
+            os.path.join(JUDGE_CA_CERT_PATH, JUDGE_CA_CERT_CM_KEY),
+        )
 
         # uncomment if updating image with same tag
         # set_image_pull_policy(run_mt_bench_task, "Always")
@@ -411,8 +416,13 @@ def pipeline(
 
         use_secret_as_env(final_eval_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"})
 
-        use_config_map_as_volume(final_eval_task, JUDGE_CONFIG_MAP, mount_path=JUDGE_CA_CERT_PATH)
-        final_eval_task.set_env_variable(JUDGE_CA_CERT_ENV_VAR_NAME, os.path.join(JUDGE_CA_CERT_PATH, JUDGE_CA_CERT_CM_KEY))
+        use_config_map_as_volume(
+            final_eval_task, JUDGE_CONFIG_MAP, mount_path=JUDGE_CA_CERT_PATH
+        )
+        final_eval_task.set_env_variable(
+            JUDGE_CA_CERT_ENV_VAR_NAME,
+            os.path.join(JUDGE_CA_CERT_PATH, JUDGE_CA_CERT_CM_KEY),
+        )
 
         final_eval_task.after(run_mt_bench_task)
         final_eval_task.set_accelerator_type("nvidia.com/gpu")

diff --git a/pipeline.yaml b/pipeline.yaml
@@ -1144,17 +1144,17 @@ deploymentSpec:
           \    few_shots: int,\n    batch_size: str,\n    merge_system_user_message:\
           \ bool,\n    candidate_model: str = None,\n    taxonomy_path: str = \"/input/taxonomy\"\
           ,\n    sdg_path: str = \"/input/sdg\",\n):\n    import json\n    import\
-          \ os\n    import httpx\n    import subprocess\n\n    import torch\n    from\
+          \ os\n    import subprocess\n\n    import httpx\n    import torch\n    from\
           \ instructlab.eval.mmlu import MMLUBranchEvaluator\n    from instructlab.eval.mt_bench\
           \ import MTBenchBranchEvaluator\n    from instructlab.model.evaluate import\
           \ qa_pairs_to_qna_to_avg_scores, sort_score\n\n    judge_api_key = os.getenv(\"\
           JUDGE_API_KEY\", \"\")\n    judge_model_name = os.getenv(\"JUDGE_NAME\"\
           )\n    judge_endpoint = os.getenv(\"JUDGE_ENDPOINT\")\n    judge_ca_cert_path\
           \ = os.getenv(\"JUDGE_CA_CERT_PATH\")\n    use_tls = os.path.exists(judge_ca_cert_path)\
-          \ and (os.path.getsize(judge_ca_cert_path) > 0)\n    judge_http_client =\
-          \ httpx.Client(verify=judge_ca_cert_path) if use_tls else None\n\n    print(\"\
-          Starting Final Eval...\")\n\n    def launch_vllm(\n        model_path: str,\
-          \ gpu_count: int, retries: int = 120, delay: int = 10\n    ) -> tuple:\n\
+          \ and (\n        os.path.getsize(judge_ca_cert_path) > 0\n    )\n    judge_http_client\
+          \ = httpx.Client(verify=judge_ca_cert_path) if use_tls else None\n\n   \
+          \ print(\"Starting Final Eval...\")\n\n    def launch_vllm(\n        model_path:\
+          \ str, gpu_count: int, retries: int = 120, delay: int = 10\n    ) -> tuple:\n\
           \        import subprocess\n        import sys\n        import time\n\n\
           \        import requests\n        from instructlab.model.backends.common\
           \ import free_tcp_ipv4_port\n\n        free_port = free_tcp_ipv4_port(\"\
@@ -1402,13 +1402,13 @@ deploymentSpec:
           \    max_workers: str,\n    models_folder: str,\n    output_path: str =\
           \ \"/output/mt_bench_data.json\",\n    best_score_file: Optional[str] =\
           \ None,\n) -> NamedTuple(\"outputs\", best_model=str, best_score=float):\n\
-          \    import json\n    import os\n    import httpx\n    import subprocess\n\
-          \n    import torch\n    from instructlab.eval.mt_bench import MTBenchEvaluator\n\
+          \    import json\n    import os\n    import subprocess\n\n    import httpx\n\
+          \    import torch\n    from instructlab.eval.mt_bench import MTBenchEvaluator\n\
           \n    judge_api_key = os.getenv(\"JUDGE_API_KEY\", \"\")\n    judge_model_name\
           \ = os.getenv(\"JUDGE_NAME\")\n    judge_endpoint = os.getenv(\"JUDGE_ENDPOINT\"\
           )\n    judge_ca_cert_path = os.getenv(\"JUDGE_CA_CERT_PATH\")\n    use_tls\
-          \ = os.path.exists(judge_ca_cert_path) and (os.path.getsize(judge_ca_cert_path)\
-          \ > 0)\n    judge_http_client = httpx.Client(verify=judge_ca_cert_path)\
+          \ = os.path.exists(judge_ca_cert_path) and (\n        os.path.getsize(judge_ca_cert_path)\
+          \ > 0\n    )\n    judge_http_client = httpx.Client(verify=judge_ca_cert_path)\
           \ if use_tls else None\n\n    def launch_vllm(\n        model_path: str,\
           \ gpu_count: int, retries: int = 120, delay: int = 10\n    ) -> tuple:\n\
           \        import subprocess\n        import sys\n        import time\n\n\
@@ -1528,13 +1528,13 @@ deploymentSpec:
           \ *\n\ndef sdg_op(\n    num_instructions_to_generate: int,\n    pipeline:\
           \ str,\n    repo_branch: Optional[str],\n    repo_pr: Optional[int],\n \
           \   taxonomy_path: str = \"/data/taxonomy\",\n    sdg_path: str = \"/data/sdg\"\
-          ,\n    sdg_sampling_size: float = 1.0,\n):\n    from os import getenv, path\n\
-          \n    import instructlab.sdg\n    import openai\n    import yaml\n    import\
-          \ os\n\n    api_key = getenv(\"api_key\")\n    model = getenv(\"model\"\
+          ,\n    sdg_sampling_size: float = 1.0,\n):\n    import os\n    from os import\
+          \ getenv, path\n\n    import instructlab.sdg\n    import openai\n    import\
+          \ yaml\n\n    api_key = getenv(\"api_key\")\n    model = getenv(\"model\"\
           )\n    endpoint = getenv(\"endpoint\")\n\n    sdg_ca_cert_path = getenv(\"\
           SDG_CA_CERT_PATH\")\n    use_tls = os.path.exists(sdg_ca_cert_path) and\
-          \ (os.path.getsize(sdg_ca_cert_path) > 0)\n    if use_tls:\n        import\
-          \ httpx\n\n        custom_http_client = httpx.Client(verify=sdg_ca_cert_path)\n\
+          \ (\n        os.path.getsize(sdg_ca_cert_path) > 0\n    )\n    if use_tls:\n\
+          \        import httpx\n\n        custom_http_client = httpx.Client(verify=sdg_ca_cert_path)\n\
           \        client = openai.OpenAI(\n            base_url=endpoint, api_key=api_key,\
           \ http_client=custom_http_client\n        )\n    else:\n        client =\
           \ openai.OpenAI(base_url=endpoint, api_key=api_key)\n\n    taxonomy_base\

diff --git a/sdg/components.py b/sdg/components.py
@@ -37,19 +37,21 @@ def sdg_op(
     sdg_path: str = "/data/sdg",
     sdg_sampling_size: float = 1.0,
 ):
+    import os
     from os import getenv, path
 
     import instructlab.sdg
     import openai
     import yaml
-    import os
 
     api_key = getenv("api_key")
     model = getenv("model")
     endpoint = getenv("endpoint")
 
     sdg_ca_cert_path = getenv("SDG_CA_CERT_PATH")
-    use_tls = os.path.exists(sdg_ca_cert_path) and (os.path.getsize(sdg_ca_cert_path) > 0)
+    use_tls = os.path.exists(sdg_ca_cert_path) and (
+        os.path.getsize(sdg_ca_cert_path) > 0
+    )
     if use_tls:
         import httpx
 

diff --git a/training/components.py b/training/components.py
@@ -1,7 +1,7 @@
 # type: ignore
 # pylint: disable=import-outside-toplevel,missing-function-docstring
 
-from typing import NamedTuple, Optional
+from typing import Optional
 
 from kfp import dsl
 

diff --git a/training/run_main_ds.py b/training/run_main_ds.py
@@ -20,8 +20,8 @@ def run_main_ds(torch_args: TorchrunArgs, train_args: TrainingArgs) -> None:
         f"--nproc_per_node={torch_args.nproc_per_node}",
         f"--rdzv_id={torch_args.rdzv_id}",
         f"--rdzv_endpoint={torch_args.rdzv_endpoint}",
-        f"-m",
-        f"instructlab.training.main_ds",
+        "-m",
+        "instructlab.training.main_ds",
         f"--model_name_or_path={train_args.model_path}",
         f"--data_path={train_args.data_output_dir}/data.jsonl",
         f"--output_dir={train_args.ckpt_output_dir}",
@@ -30,7 +30,7 @@ def run_main_ds(torch_args: TorchrunArgs, train_args: TrainingArgs) -> None:
         f"--learning_rate={train_args.learning_rate}",
         f"--num_warmup_steps={train_args.warmup_steps}",
         f"--save_samples={train_args.save_samples}",
-        f"--log_level=INFO",
+        "--log_level=INFO",
         f"--max_batch_len={train_args.max_batch_len}",
         f"--seed={train_args.random_seed}",
         f"--chat-tmpl-path={train_args.chat_tmpl_path}",

diff --git a/utils/components.py b/utils/components.py
@@ -3,7 +3,7 @@
 
 from kfp import dsl
 
-from .consts import PYTHON_IMAGE, RHELAI_IMAGE, TOOLBOX_IMAGE
+from .consts import RHELAI_IMAGE, TOOLBOX_IMAGE
 
 
 @dsl.container_component