add new use case for research assistant

futurice · Dec 11, 2024 · 0f33457 · 0f33457
1 parent 9165f1c
commit 0f33457
Show file tree

Hide file tree

Showing 20 changed files with 405 additions and 3 deletions.
diff --git a/.github/workflows/research_assistant_ci_dev_workflow.yml b/.github/workflows/research_assistant_ci_dev_workflow.yml
@@ -0,0 +1,42 @@
+name: math_coding_ci_dev_workflow
+
+on:
+  workflow_call:
+    inputs:
+      env_name:
+        type: string
+        description: "Execution Environment"
+        required: true
+        default: "dev"
+      use_case_base_path:
+        type: string
+        description: "The flow usecase to execute"
+        required: true
+        default: "use_case_research_assistant"
+      deployment_type:
+        type: string
+        description: "Determine type of deployment - aml, aks, docker, webapp"
+        required: true
+  push:
+    branches:
+      - main
+      - development
+    paths:
+      - 'use_case_research_assistant/**'
+      - '.github/**'
+      - 'llmops/**'
+
+#=====================================
+# Execute platform_ci_dev_workflow workflow for experiment, evaluation and deployment of flows
+#=====================================
+jobs:
+  execute-platform-flow-ci:
+    uses: ./.github/workflows/platform_ci_dev_workflow.yml
+    with:
+      env_name: ${{ inputs.env_name || 'dev'}}
+      use_case_base_path: ${{ inputs.use_case_base_path || 'use_case_research_assistant' }}
+      deployment_type: ${{ inputs.deployment_type|| 'aml' }}
+    secrets:
+      azure_credentials: ${{ secrets.AZURE_CREDENTIALS }}
+      registry_details: ${{ secrets.DOCKER_IMAGE_REGISTRY }}
+      env_vars: ${{ secrets.ENV_VARS }}
diff --git a/.github/workflows/research_assistant_pr_dev_workflow.yml b/.github/workflows/research_assistant_pr_dev_workflow.yml
@@ -0,0 +1,36 @@
+name: math_coding_pr_dev_workflow
+
+on:
+  workflow_call:
+    inputs:
+      env_name:
+        type: string
+        description: "Execution Environment"
+        required: true
+        default: "dev"
+      use_case_base_path:
+        type: string
+        description: "The flow usecase to execute"
+        required: true
+        default: "use_case_research_assistant"
+  pull_request:
+    branches:
+      - main
+      - development
+    paths:
+      - 'math_coding/**'
+      - '.github/**'
+      - 'llmops/**'
+
+#=====================================
+# Execute platform_pr_dev_workflow workflow for experiment, evaluation and deployment of flows
+#=====================================
+jobs:
+  execute-platform-pr-workflow:
+    uses: ./.github/workflows/platform_pr_dev_workflow.yml
+    with:
+      env_name: ${{ inputs.env_name || 'pr'}}
+      use_case_base_path: ${{ inputs.flow_type || 'use_case_research_assistant' }}
+    secrets:
+      azure_credentials: ${{ secrets.AZURE_CREDENTIALS }}
+      env_vars: ${{ secrets.ENV_VARS }}
diff --git a/math_coding/experiment.yaml b/math_coding/experiment.yaml
@@ -6,7 +6,7 @@ connections:
   connection_type: AzureOpenAIConnection
   api_base: https://edge-10x-ai-services.cognitiveservices.azure.com
   api_version: 2023-07-01-preview
-  api_key: ${AZURE_OPENAI_API_KEY}
+  api_key: ${api_key}
   api_type: azure
 
 datasets:

diff --git a/math_coding/flows/math_standard_flow/flow.dag.yaml b/math_coding/flows/math_standard_flow/flow.dag.yaml
@@ -39,8 +39,6 @@ nodes:
     type: code
     path: ask_llm.jinja2
   inputs:
-    # This is to easily switch between openai and azure openai.
-    # deployment_name is required by azure openai, model is required by openai.
     deployment_name: gpt-35-turbo
     model: gpt-3.5-turbo
     question: ${inputs.math_question}

diff --git a/math_coding/tests/test_env.py b/math_coding/tests/test_env.py
@@ -0,0 +1,45 @@
+import os
+from promptflow.client import PFClient
+
+def test_env_variables():
+    # Initialize PFClient
+    pf = PFClient()
+
+    # List all connections
+    print("\nListing all connections:")
+    try:
+        connections = pf.connections.list()
+        for conn in connections:
+            print(f"Connection name: {conn.name}")
+            print(f"Connection type: {conn.type}")
+            print(f"Connection configs: {conn.configs}")
+            print("---")
+    except Exception as e:
+        print("Error listing connections:", str(e))
+
+    print("\nChecking environment variables:")
+    print("AZURE_OPENAI_API_KEY:", bool(os.getenv("AZURE_OPENAI_API_KEY")))
+    print("AZURE_OPENAI_ENDPOINT:", os.getenv("AZURE_OPENAI_ENDPOINT"))
+
+    # Test connection to Azure OpenAI
+    from openai import AzureOpenAI
+
+    print("\nTesting Azure OpenAI connection:")
+    client = AzureOpenAI(
+        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+        api_version="2023-07-01-preview",
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
+    )
+
+    # Try a simple completion
+    try:
+        response = client.chat.completions.create(
+            model="gpt-35-turbo",  # Using the deployment name from your flow.dag.yaml
+            messages=[{"role": "user", "content": "Hello!"}]
+        )
+        print("Connection successful!")
+    except Exception as e:
+        print("Connection failed:", str(e))
+
+if __name__ == "__main__":
+    test_env_variables() 
diff --git a/math_coding/tests/test_math_evaluation.py b/math_coding/tests/test_math_evaluation.py
@@ -0,0 +1,74 @@
+from promptflow.client import PFClient
+
+def test_math_evaluation_flow():
+    # Initialize the Promptflow client
+    pf = PFClient()
+
+    # Path to your flow
+    flow_path = "../flows/math_evaluation_flow"
+
+    # Test case 1: Correct prediction
+    result1 = pf.test(flow=flow_path, inputs={
+        "groundtruth": "3.14",
+        "prediction": "3.14"
+    })
+    print("\nTest 1 - Exact match:")
+    print(f"Score: {result1['score']}")
+
+    # Test case 2: Close enough prediction (rounds to same value)
+    result2 = pf.test(flow=flow_path, inputs={
+        "groundtruth": "3.14",
+        "prediction": "3.141592"
+    })
+    print("\nTest 2 - Close enough:")
+    print(f"Score: {result2['score']}")
+
+    # Test case 3: Wrong prediction
+    result3 = pf.test(flow=flow_path, inputs={
+        "groundtruth": "3.14",
+        "prediction": "3.15"
+    })
+    print("\nTest 3 - Wrong answer:")
+    print(f"Score: {result3['score']}")
+
+    # Test case 4: Error case
+    result4 = pf.test(flow=flow_path, inputs={
+        "groundtruth": "3.14",
+        "prediction": "JSONDecodeError"
+    })
+    print("\nTest 4 - Error case:")
+    print(f"Score: {result4['score']}")
+
+    # Test batch processing
+    test_data = [
+        {"groundtruth": "1.0", "prediction": "1.0"},
+        {"groundtruth": "2.0", "prediction": "2.01"},
+        {"groundtruth": "3.14", "prediction": "3.14159"},
+        {"groundtruth": "4.0", "prediction": "JSONDecodeError"},
+    ]
+
+    batch_result = pf.test(
+        flow=flow_path,
+        inputs=test_data,
+    )
+
+    print("\nBatch Processing Results:")
+    # Print all available keys in batch_result
+    print("Available keys:", batch_result.keys() if hasattr(batch_result, 'keys') else "Result is not a dict")
+
+    # Try different ways to access metrics
+    try:
+        if hasattr(batch_result, 'metrics'):
+            print(f"Metrics (as attribute): {batch_result.metrics}")
+        elif isinstance(batch_result, dict) and 'metrics' in batch_result:
+            print(f"Metrics (as dict key): {batch_result['metrics']}")
+        elif isinstance(batch_result, dict) and 'output' in batch_result:
+            print(f"Output: {batch_result['output']}")
+        else:
+            print("Raw batch result:", batch_result)
+    except Exception as e:
+        print(f"Error accessing metrics: {str(e)}")
+        print("Raw batch result:", batch_result)
+
+if __name__ == "__main__":
+    test_math_evaluation_flow() 
diff --git a/use_case_research_assistant/configs/deployment_config.json b/use_case_research_assistant/configs/deployment_config.json
@@ -0,0 +1,56 @@
+{
+    "azure_managed_endpoint":[
+        {
+            "ENV_NAME": "dev",
+            "TEST_FILE_PATH": "sample-request.json",
+            "ENDPOINT_NAME": "research-assistant-1",
+            "ENDPOINT_DESC": "An online endpoint serving a flow for research assistant",
+            "DEPLOYMENT_DESC": "prompt flow deployment",
+            "PRIOR_DEPLOYMENT_NAME": "research-assistant-1",
+            "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "",
+            "CURRENT_DEPLOYMENT_NAME": "research-assistant-1",
+            "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": "100",
+            "DEPLOYMENT_VM_SIZE": "Standard_F2s_v2",
+            "DEPLOYMENT_INSTANCE_COUNT": 1,
+            "ENVIRONMENT_VARIABLES": {
+                "example-name": "example-value"
+            }
+        }
+    ],
+    "kubernetes_endpoint":[
+        {
+            "ENV_NAME": "dev",
+            "TEST_FILE_PATH": "sample-request.json",
+            "ENDPOINT_NAME": "",
+            "ENDPOINT_DESC": "An kubernetes endpoint serving a flow for math coding",
+            "DEPLOYMENT_DESC": "prompt flow deployment",
+            "PRIOR_DEPLOYMENT_NAME": "",
+            "PRIOR_DEPLOYMENT_TRAFFIC_ALLOCATION": "",
+            "CURRENT_DEPLOYMENT_NAME": "",
+            "CURRENT_DEPLOYMENT_TRAFFIC_ALLOCATION": 100,
+            "COMPUTE_NAME": "",
+            "DEPLOYMENT_VM_SIZE": "promptinstancetype",
+            "DEPLOYMENT_INSTANCE_COUNT": 1,
+            "CPU_ALLOCATION": "",
+            "MEMORY_ALLOCATION": "",
+            "ENVIRONMENT_VARIABLES": {
+                "example-name": "example-value"
+            }
+        }
+    ],
+    "webapp_endpoint":[
+        {
+            "ENV_NAME": "dev",
+            "TEST_FILE_PATH": "sample-request.json",
+            "CONNECTION_NAMES": [""],
+            "REGISTRY_NAME": "",
+            "REGISTRY_RG_NAME": "",
+            "APP_PLAN_NAME": "",
+            "WEB_APP_NAME": "",
+            "WEB_APP_RG_NAME": "",
+            "WEB_APP_SKU": "B3",
+            "USER_MANAGED_ID": ""
+
+        }
+    ]
+}
diff --git a/use_case_research_assistant/data/eval_data.json b/use_case_research_assistant/data/eval_data.json
@@ -0,0 +1 @@
+{"question": "What's the population of Finland?", "answer": "The population of Finland is 5.5 million."}
diff --git a/use_case_research_assistant/data/sample_data.json b/use_case_research_assistant/data/sample_data.json
@@ -0,0 +1 @@
+{"question": "What's the population of Finland?", "answer": "The population of Finland is 5.5 million."}
diff --git a/use_case_research_assistant/environment/Dockerfile b/use_case_research_assistant/environment/Dockerfile
@@ -0,0 +1,32 @@
+# syntax=docker/dockerfile:1
+FROM docker.io/continuumio/miniconda3:latest
+
+WORKDIR /
+
+COPY ./flow/requirements.txt /flow/requirements.txt
+
+RUN apt-get update && apt-get install -y runit gcc
+
+# create conda environment
+RUN conda create -n promptflow-serve python=3.9.16 pip=23.0.1 -q -y && \
+    conda run -n promptflow-serve \
+    pip install -r /flow/requirements.txt && \
+    conda run -n promptflow-serve pip install keyrings.alt && \
+    conda run -n promptflow-serve pip install gunicorn==20.1.0 && \
+    conda run -n promptflow-serve pip cache purge && \
+    conda clean -a -y
+
+COPY ./flow /flow
+
+EXPOSE 8080
+
+COPY ./connections/* /connections/
+
+# reset runsvdir
+RUN rm -rf /var/runit
+COPY ./runit /var/runit
+# grant permission
+RUN chmod -R +x /var/runit
+
+COPY ./start.sh /
+CMD ["bash", "./start.sh"]
diff --git a/use_case_research_assistant/experiment.yaml b/use_case_research_assistant/experiment.yaml
@@ -0,0 +1,30 @@
+name: named_entity_recognition
+flow: flows/standard
+
+connections:
+- name: aoai
+  connection_type: AzureOpenAIConnection
+  api_base: https://edge-10x-ai-services.cognitiveservices.azure.com
+  api_version: 2023-07-01-preview
+  api_key: ${api_key}
+  api_type: azure
+
+datasets:
+- name: sample_research_questions_training
+  source: data/sample_data.json
+  description: "This dataset is for sample research questions."
+  mappings:
+    question: "${data.question}"
+    answer: "${data.answer}"
+
+evaluators:
+- name: evaluate_research_assistant
+  flow: flows/evaluation
+  datasets:
+  - name: sample_research_questions_test
+    reference: sample_research_questions_training
+    source: data/eval_data.jsonl
+    description: "This dataset is for evaluating flows."
+    mappings:
+      ground_truth: "${data.results}"
+      entities: "${run.outputs.entities}"
diff --git a/use_case_research_assistant/flows/evaluation/flow.dag.yaml b/use_case_research_assistant/flows/evaluation/flow.dag.yaml
@@ -0,0 +1,25 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+inputs:
+  question:
+    type: string
+    default: "What's the population of Finland?"
+  answer:
+    type: string
+    default: "The population of Finland is 5.5 million."
+  ground_truth:
+    type: string
+    default: '"5.5 million"'
+outputs:
+  output_answer:
+    type: object
+    reference: ${search_each_question.output}
+nodes:
+- name: search_question
+  type: python
+  source:
+    type: code
+    path: search_question.py
+  inputs:
+    question: ${inputs.question}
+environment:
+  python_requirements_txt: requirements.txt
diff --git a/use_case_research_assistant/flows/evaluation/requirements.txt b/use_case_research_assistant/flows/evaluation/requirements.txt
@@ -0,0 +1,3 @@
+promptflow
+promptflow-tools
+promptflow-sdk[builtins]
diff --git a/use_case_research_assistant/flows/evaluation/search_question.py b/use_case_research_assistant/flows/evaluation/search_question.py
diff --git a/use_case_research_assistant/flows/standard/QUESTION_EXPANDER.jinja2 b/use_case_research_assistant/flows/standard/QUESTION_EXPANDER.jinja2
@@ -0,0 +1,6 @@
+system:
+You are a research assistant. Your task is to rephrase the given question into a more specific question.
+
+user:
+Question: {{question}}
+Sub-questions:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"question": "What's the population of Finland?", "answer": "The population of Finland is 5.5 million."}