From a22290d1cdcd810ef5f4d78739851322b719233d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mar=C3=ADa=20Medina?= <mariamedp@gmail.com>
Date: Wed, 4 Sep 2024 15:46:45 +0200
Subject: [PATCH] Fix value resolution in resolve_env_vars() (#195)

- Removed uppercase conversion for envvar values in
`resolve_env_vars()`. Now envvar name is converted to uppercase, but
value is left as-is.
- Added logging so users can understand why env resolution fails.
- Refactored a bit to reduce indentation levels so that logging messages
fit with current line length limitation.
- Edited docs to be consistent with uppercase envvar names.
- Fixed typo in workflow name (said "ci" when it was "pr").
---
 .../workflows/class_flows_pr_dev_workflow.yml |  2 +-
 docs/faqs.md                                  |  2 +-
 llmops/common/common.py                       | 47 +++++++++++--------
 .../deployment/kubernetes_deployment.py       |  2 +-
 .../common/deployment/provision_deployment.py |  2 +-
 llmops/common/prompt_eval.py                  |  2 +-
 llmops/common/prompt_pipeline.py              |  3 +-
 7 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/class_flows_pr_dev_workflow.yml b/.github/workflows/class_flows_pr_dev_workflow.yml
index e7779c2b0..3f71e9d7d 100644
--- a/.github/workflows/class_flows_pr_dev_workflow.yml
+++ b/.github/workflows/class_flows_pr_dev_workflow.yml
@@ -1,4 +1,4 @@
-name: class_flows_ci_dev_workflow
+name: class_flows_pr_dev_workflow
 
 on:
   workflow_call:
diff --git a/docs/faqs.md b/docs/faqs.md
index 671a45f08..7871ea6c8 100644
--- a/docs/faqs.md
+++ b/docs/faqs.md
@@ -197,7 +197,7 @@ APPLICATIONINSIGHTS_CONNECTION_STRING=xxxx
 
 ### What values should go into .env file and ENV_VARS ?
 
-If you have configurations in flows (init.json, flow.flex.yaml), experiment.yaml and env.yaml files that uses ${secret_name} style placeholders, then the actual value for secret_name should be stored in ENV_VARS secret in github, library variable in Azure DevOps and within .env file for local execution. These placeholders will be replaced by the actual values during execution either from ENV_VARS secret in github, library variable in Azure DevOps or from .env file for local execution.
+If you have configurations in flows (init.json, flow.flex.yaml), experiment.yaml and env.yaml files that uses ${SECRET_NAME} style placeholders, then the actual value for SECRET_NAME should be stored in ENV_VARS secret in github, library variable in Azure DevOps and within .env file for local execution. These placeholders will be replaced by the actual values during execution either from ENV_VARS secret in github, library variable in Azure DevOps or from .env file for local execution.
 
 - `experiment.yaml` file should contain placeholders for api_key within connections element.
 - `env.yaml` file should contain placeholders for all other configurations. Check use case `function_flows` for an example. It has multiple ${} placeholder within env.yaml.
diff --git a/llmops/common/common.py b/llmops/common/common.py
index 62921f1d4..258d0e035 100644
--- a/llmops/common/common.py
+++ b/llmops/common/common.py
@@ -45,7 +45,7 @@ def get_property_value(self):
             raise ValueError("Neither 'pf' nor 'ml_client' is available")
 
 
-def resolve_env_vars(base_path: str) -> Dict:
+def resolve_env_vars(base_path: str, logger: logging.Logger) -> Dict:
     """
     Resolve the environment variables from the config files.
 
@@ -55,33 +55,40 @@ def resolve_env_vars(base_path: str) -> Dict:
     env_vars = {}
     yaml_file_path = os.path.join(base_path,  "environment", "env.yaml")
     if os.path.isfile(os.path.abspath(yaml_file_path)):
+        logger.info(f"Reading envvars from '{yaml_file_path}'")
         with open(yaml_file_path, "r") as file:
             yaml_data = yaml.safe_load(file)
         for key, value in yaml_data.items():
             key = str(key).strip().upper()
-            value = str(value).strip().upper()
-
-            temp_val = os.environ.get(key, None)
-            if temp_val is not None:
-                env_vars[key] = os.environ.get(key, None)
-            else:
-                if (
-                    isinstance(value, str)
-                    and value.startswith('${')
-                    and value.endswith('}')
-                ):
-                    value = value.replace('${', '').replace('}', '')
-                    resolved_value = os.environ.get(value, None)
+            value = str(value).strip()
+
+            value_existing = os.environ.get(key, None)
+            if value_existing is not None:
+                logger.info(f"Envvar {key} defined in env, using that value.")
+                env_vars[key] = value_existing
+            elif value is None or len(value) == 0:
+                raise ValueError(f"{key} in env.yaml not resolved")
+            elif (
+                isinstance(value, str)
+                and value.startswith('${')
+                and value.endswith('}')
+            ):
+                value_reference = value.replace('${', '').replace('}', '')
+                try:
+                    resolved_value = os.environ[value_reference]
+                    logger.info(f"Reference {value} resolved.")
                     os.environ[key] = str(resolved_value)
                     env_vars[key] = str(resolved_value)
-                elif value is None or len(value) == 0:
-                    raise ValueError(f"{key} in env.yaml not resolved")
-                else:
-                    os.environ[key] = str(value)
-                    env_vars[key] = str(value)
+                except KeyError as e:
+                    raise ValueError(
+                        f"Reference {value} could not be resolved") from e
+            else:
+                os.environ[key] = str(value)
+                env_vars[key] = str(value)
+                logger.info(f"Envvar {key} loaded.")
     else:
         env_vars = {}
-        print("no values")
+        logger.info("No env file found.")
     return env_vars
 
 
diff --git a/llmops/common/deployment/kubernetes_deployment.py b/llmops/common/deployment/kubernetes_deployment.py
index 855325f55..e764d5b07 100644
--- a/llmops/common/deployment/kubernetes_deployment.py
+++ b/llmops/common/deployment/kubernetes_deployment.py
@@ -116,7 +116,7 @@ def create_kubernetes_deployment(
                 key, value = key_value
                 params_dict[key] = value
 
-    env_vars = resolve_env_vars(experiment.base_path)
+    env_vars = resolve_env_vars(experiment.base_path, logger)
 
     real_config = f"{base_path}/configs/deployment_config.json"
 
diff --git a/llmops/common/deployment/provision_deployment.py b/llmops/common/deployment/provision_deployment.py
index 724a93fe6..a580667ed 100644
--- a/llmops/common/deployment/provision_deployment.py
+++ b/llmops/common/deployment/provision_deployment.py
@@ -115,7 +115,7 @@ def create_deployment(
                 key, value = key_value
                 params_dict[key] = value
 
-    env_vars = resolve_env_vars(experiment.base_path)
+    env_vars = resolve_env_vars(experiment.base_path, logger)
 
     real_config = f"{base_path}/configs/deployment_config.json"
 
diff --git a/llmops/common/prompt_eval.py b/llmops/common/prompt_eval.py
index 30d5ca52d..0c03983a7 100644
--- a/llmops/common/prompt_eval.py
+++ b/llmops/common/prompt_eval.py
@@ -141,7 +141,7 @@ def prepare_and_execute(
 
         flow_type, params_dict = resolve_flow_type(evaluator.path, "")
 
-        env_vars = resolve_env_vars(experiment.base_path)
+        env_vars = resolve_env_vars(experiment.base_path, logger)
 
         dataframes = []
         metrics = []
diff --git a/llmops/common/prompt_pipeline.py b/llmops/common/prompt_pipeline.py
index ff30f9b85..93c0de376 100644
--- a/llmops/common/prompt_pipeline.py
+++ b/llmops/common/prompt_pipeline.py
@@ -172,6 +172,7 @@ def prepare_and_execute(
     ml_client = None
     wrapper = None
     if EXECUTION_TYPE == "LOCAL":
+        logger.info("Creating promptflow connections for local execution.")
         pf = PFClientLocal()
         create_pf_connections(
             exp_filename,
@@ -207,7 +208,7 @@ def prepare_and_execute(
     all_metrics = []
 
     env_vars = {}
-    env_vars = resolve_env_vars(experiment.base_path)
+    env_vars = resolve_env_vars(experiment.base_path, logger)
 
     logger.info(f"Running experiment {experiment.name}")
     for mapped_dataset in experiment.datasets: