Skip to content

Commit

Permalink
add
Browse files Browse the repository at this point in the history
  • Loading branch information
rachhek committed Dec 12, 2024
1 parent 4044337 commit e4a53e0
Show file tree
Hide file tree
Showing 16 changed files with 2,546 additions and 13 deletions.
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12
15 changes: 15 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[project]
name = "edge-10x-prompt-flows"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"jinja2>=3.1.4",
"promptflow-azure>=1.16.2",
"promptflow>=1.16.2",
"promptflow-sdk>=0.0.1",
"promptflow-tools>=1.4.0",
"openai>=1.56.2",
"python-dotenv>=1.0.1",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from typing import List, Dict
import json
from promptflow import tool

@tool
def make_google_search_queries(reformulated_questions: str, company_name: str, website_filters: List[str]) -> List[str]:
"""
Takes reformulated questions as JSON string and creates multiple search queries with website filters
Args:
reformulated_questions: JSON string containing reformulated questions from the LLM
company_name: Company name to focus the search on
website_filters: List of website domains to restrict search to
Returns:
List of search queries with different variations and site filters
"""
# Parse JSON string to get queries
try:
questions_dict = json.loads(reformulated_questions)
queries = questions_dict.get('queries', [])
except json.JSONDecodeError:
print(f"Warning: Could not parse JSON: {reformulated_questions}")
return []

# Create site-specific queries for each domain and query
site_queries = []
for domain in website_filters:
for base_query in queries:
# Skip empty or invalid queries
if not base_query or not isinstance(base_query, str):
continue

# Clean the query and ensure company name is included
base_query = base_query.strip()
if company_name.lower() not in base_query.lower():
base_query = f"{company_name} {base_query}"

# Basic site-restricted query
site_queries.append(f'site:{domain} {base_query}')

# Add variations with common search modifiers, always including company name
site_queries.extend([
f'site:{domain} {base_query} news',
f'site:{domain} {base_query} blog',
f'site:{domain} {base_query} press release',
f'site:{domain} {base_query} report',
f'site:{domain} {base_query} whitepaper',
f'site:{domain} {base_query} case study',
f'site:{domain} {base_query} webinar',
f'site:{domain} {base_query} podcast',
f'site:{domain} {base_query} video',
f'site:{domain} {base_query} infographic'
])

# Add some non-site-restricted queries for broader context
general_queries = []
for base_query in queries:
if not base_query or not isinstance(base_query, str):
continue
base_query = base_query.strip()
if company_name.lower() not in base_query.lower():
base_query = f"{company_name} {base_query}"

general_queries.extend([
f'{base_query} announcement',
f'{base_query} blog',
f'{base_query} documentation'
])

# Combine all queries and remove duplicates while preserving order
all_queries = []
seen = set()
for query in site_queries + general_queries:
if query not in seen:
all_queries.append(query)
seen.add(query)

return all_queries
2 changes: 1 addition & 1 deletion use_case_research_assistant/flows/evaluation/flow.dag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ inputs:
outputs:
output_answer:
type: object
reference: ${search_each_question.output}
reference: ${search_question.output}
nodes:
- name: search_question
type: python
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
system:
You are a research assistant. Your task is to rephrase the given question into a more specific question.

It's important to return the response as a JSON object with a "queries" array containing the search queries.
user:
Question: {{question}}
Sub-questions:
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
system:
You are an expert Query rewriter and reformulator. Here is a given question to reformulate.
These reformulated queries will be used for researching the internet. The objective is to get as good search queries as possible.

Do not use company name placeholders when reformulating the question. Just reformulate the question in a logical and SEO way.

Here is how you will think about the question:
1. Understand the question thoroughly
2. Identify key concepts and queries in the question
3. Extract the main concepts and related concepts from the question
4. Break down the original question into several search queries
4. Reformulate the search queries into a more specific and targeted search query

Here are some good examples that we want to follow. Please study them and make similar reformulations.

#Original query
Has the company Vattenfall adopted reference architectures or best practices to guide its cloud deployments?
#reformulated queries
- Vattenfall cloud deployment reference architectures
- Vattenfall best practices cloud deployment
- Has Vattenfall adopted cloud deployment standards

#Original queries
How well is the company Vattenfall managing risks associated with cloud migration (e.g., downtime, data loss)?
#reformulated query
- Vattenfall cloud migration risk management strategies
- Challenges of cloud migration for Vattenfall
- Vattenfall case study on cloud migration success and failures

#Original queries
How well are external market or competitive factors being integrated into cloud discussions of Vattenfall?
#reformulated query
- Integration of external market factors in cloud discussions
- Competitive analysis in cloud computing
- Incorporating market trends in cloud strategy

It's important to return the response as a JSON object with a "queries" array containing the search queries.

user:
Reformulate the following query:
<query>: {{question}} </query>
Formulated_queries:
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
system: You are a research assistant helping to reformulate questions to get specific and focused search results about {{company_name}} products and services.

user: Please reformulate this question into multiple specific search queries about {{company_name}}: {{question}}
Focus on specific product names, features, and technical details.

Important: Return your response as a JSON object with a "queries" array containing the search queries.

assistant: {
"queries": [
"{{company_name}} {{question}} product names specifications",
"{{company_name}} {{question}} latest features updates",
"{{company_name}} {{question}} technical capabilities",
"{{company_name}} {{question}} pricing tiers comparison",
"{{company_name}} {{question}} system requirements",
"{{company_name}} {{question}} integration options"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
system: You are a comprehensive research assistant helping to explore all aspects of {{company_name}} products and services.

user: Please reformulate this question into multiple comprehensive search queries about {{company_name}}: {{question}}
Consider multiple angles including business impact, use cases, and industry applications.

Important: Return your response as a JSON object with a "queries" array containing the search queries.

assistant: {
"queries": [
"{{company_name}} {{question}} enterprise solutions overview",
"{{company_name}} {{question}} customer success stories",
"{{company_name}} {{question}} industry use cases",
"{{company_name}} {{question}} business benefits ROI",
"{{company_name}} {{question}} market analysis comparison",
"{{company_name}} {{question}} future roadmap plans"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
system: You are a technical research assistant focusing on detailed technical aspects of {{company_name}} products and services.

user: Please reformulate this question into multiple technical search queries about {{company_name}}: {{question}}
Focus on technical specifications, documentation, and implementation details.

Important: Return your response as a JSON object with a "queries" array containing the search queries.

assistant: {
"queries": [
"{{company_name}} {{question}} technical documentation guide",
"{{company_name}} {{question}} API reference implementation",
"{{company_name}} {{question}} architecture overview",
"{{company_name}} {{question}} deployment configuration",
"{{company_name}} {{question}} best practices guidelines",
"{{company_name}} {{question}} security compliance requirements"
]
}
53 changes: 42 additions & 11 deletions use_case_research_assistant/flows/standard/flow.dag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,62 @@ $schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
inputs:
question:
type: string
default: What's the population of Finland?
outputs:
answer:
default: "What are some generative ai products?"
company_name:
type: string
reference: ${search_question.output}
default: "Microsoft"
website_filters:
type: list
default: ["www.microsoft.com", "www.learn.microsoft.com"]
outputs:
answers:
type: list
reference: ${make_google_search_queries.output}
nodes:
- name: QUESTION_EXPANDER
- name: QUESTION_REFORMULATION
type: llm
source:
type: code
path: QUESTION_EXPANDER.jinja2
path: QUESTION_REFORMULATION_1.jinja2
inputs:
deployment_name: gpt-35-turbo
max_tokens: 64
deployment_name: gpt-4o
max_tokens: 1000
question: ${inputs.question}
company_name: ${inputs.company_name}
response_format: {"type": "json_object"}
provider: AzureOpenAI
connection: aoai
api: chat
module: promptflow.tools.aoai
- name: search_question
variants:
variant_0:
source:
type: code
path: QUESTION_REFORMULATION_1.jinja2
variant_1:
source:
type: code
path: QUESTION_REFORMULATION_2.jinja2
variant_2:
source:
type: code
path: QUESTION_REFORMULATION_3.jinja2
inputs:
temperature: 0.7
variant_3:
source:
type: code
path: QUESTION_REFORMULATION_4.jinja2
inputs:
temperature: 0.3
- name: make_google_search_queries
type: python
source:
type: code
path: search_question.py
path: make_google_search_queries.py
inputs:
question: ${QUESTION_EXPANDER.output}
reformulated_questions: ${QUESTION_REFORMULATION.output}
company_name: ${inputs.company_name}
website_filters: ${inputs.website_filters}
environment:
python_requirements_txt: requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from typing import List, Dict
import json
from promptflow import tool

@tool
def make_google_search_queries(reformulated_questions: str, company_name: str, website_filters: List[str]) -> List[str]:
"""
Takes reformulated questions as JSON string and creates multiple search queries with website filters
Args:
reformulated_questions: JSON string containing reformulated questions from the LLM
company_name: Company name to focus the search on
website_filters: List of website domains to restrict search to
Returns:
List of search queries with different variations and site filters
"""
# Parse JSON string to get queries
try:
questions_dict = json.loads(reformulated_questions)
queries = questions_dict.get('queries', [])
except json.JSONDecodeError:
print(f"Warning: Could not parse JSON: {reformulated_questions}")
return []

# Create site-specific queries for each domain and query
site_queries = []
for domain in website_filters:
for base_query in queries:
# Skip empty or invalid queries
if not base_query or not isinstance(base_query, str):
continue

# Clean the query and ensure company name is included
base_query = base_query.strip()
if company_name.lower() not in base_query.lower():
base_query = f' "{company_name}" {base_query}'

# Basic site-restricted query
site_queries.append(f'site:{domain} {base_query}')

# Add variations with common search modifiers, always including company name
site_queries.extend([
f'site:{domain} {base_query} news',
f'site:{domain} {base_query} blog',
f'site:{domain} {base_query} press release',
f'site:{domain} {base_query} report',
f'site:{domain} {base_query} whitepaper',
f'site:{domain} {base_query} case study',
f'site:{domain} {base_query} webinar',
f'site:{domain} {base_query} podcast',
f'site:{domain} {base_query} video',
f'site:{domain} {base_query} infographic'
])

# Add some non-site-restricted queries for broader context
general_queries = []
for base_query in queries:
if not base_query or not isinstance(base_query, str):
continue
base_query = base_query.strip()
if company_name.lower() not in base_query.lower():
base_query = f'"{company_name}" {base_query}'

general_queries.extend([
f'{base_query} announcement',
f'{base_query} blog',
f'{base_query} documentation'
])

# Combine all queries and remove duplicates while preserving order
all_queries = []
seen = set()
for query in site_queries + general_queries:
if query not in seen:
all_queries.append(query)
seen.add(query)

return all_queries
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
system:
You are a research assistant. Your task is to rephrase the given question into a more specific question.

user:
Question: {{question}}
Sub-questions:
Loading

0 comments on commit e4a53e0

Please sign in to comment.