From a7535994dc0a4bb149acee359df8c403cafb51ce Mon Sep 17 00:00:00 2001 From: SimonOriginal Date: Fri, 28 Jun 2024 15:01:40 +0200 Subject: [PATCH 1/6] add google translate --- .../google_translation_filter_pipeline.py | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 examples/filters/google_translation_filter_pipeline.py diff --git a/examples/filters/google_translation_filter_pipeline.py b/examples/filters/google_translation_filter_pipeline.py new file mode 100644 index 00000000..8447c90d --- /dev/null +++ b/examples/filters/google_translation_filter_pipeline.py @@ -0,0 +1,127 @@ +from typing import List, Optional +from schemas import OpenAIChatMessage +from pydantic import BaseModel +import requests +import os +import time + +from utils.pipelines.main import get_last_user_message, get_last_assistant_message + +class Pipeline: + class Valves(BaseModel): + pipelines: List[str] = [] + priority: int = 0 + source_user: Optional[str] = "auto" + target_user: Optional[str] = "en" + source_assistant: Optional[str] = "en" + target_assistant: Optional[str] = "uk" + + def __init__(self): + # Initialize the pipeline type and name + self.type = "filter" + self.name = "Google Translate Filter" + + # Initialize Valves with default values + self.valves = self.Valves( + **{ + "pipelines": ["*"], + } + ) + + async def on_startup(self): + # Function called when the server is started + print(f"on_startup:{__name__}") + pass + + async def on_shutdown(self): + # Function called when the server is stopped + print(f"on_shutdown:{__name__}") + pass + + async def on_valves_updated(self): + # Function called when the valves are updated + pass + + def translate(self, text: str, source: str, target: str) -> str: + # Function to translate text using Google Translate + url = "https://translate.googleapis.com/translate_a/single" + params = { + "client": "gtx", + "sl": source, + "tl": target, + "dt": "t", + "q": text, + } + + try: + # Make a GET request to Google Translate API + r = requests.get(url, params=params) + r.raise_for_status() # Raise an exception for bad status codes + + # Parse the JSON response + result = r.json() + translated_text = ''.join([sentence[0] for sentence in result[0]]) # Combine all translated sentences into one string + return translated_text + except requests.exceptions.RequestException as e: + # Handle network errors, retrying after a short pause + print(f"Network error: {e}") + time.sleep(1) # Pause before retrying + return self.translate(text, source, target) # Retry translation + except Exception as e: + # Handle other exceptions + print(f"Error translating text: {e}") + return text # Return original text in case of error + + async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: + # Function to process incoming messages from the user + print(f"inlet:{__name__}") + + messages = body["messages"] + user_message = get_last_user_message(messages) + + print(f"User message: {user_message}") + + # Translate user message + translated_user_message = self.translate( + user_message, + self.valves.source_user, + self.valves.target_user, + ) + + print(f"Translated user message: {translated_user_message}") + + # Update the translated message in the messages list + for message in reversed(messages): + if message["role"] == "user": + message["content"] = translated_user_message + break + + body = {**body, "messages": messages} + return body + + async def outlet(self, body: dict, user: Optional[dict] = None) -> dict: + # Function to process outgoing messages from the assistant + print(f"outlet:{__name__}") + + messages = body["messages"] + assistant_message = get_last_assistant_message(messages) + + print(f"Assistant message: {assistant_message}") + + # Translate assistant message + translated_assistant_message = self.translate( + assistant_message, + self.valves.source_assistant, + self.valves.target_assistant, + ) + + print(f"Translated assistant message: {translated_assistant_message}") + + # Update the translated message in the messages list + for message in reversed(messages): + if message["role"] == "assistant": + message["content"] = translated_assistant_message + break + + body = {**body, "messages": messages} + return body From 8d3bc097e3167b6f463107247c219a9dcdfc30b2 Mon Sep 17 00:00:00 2001 From: SimonOriginal Date: Fri, 28 Jun 2024 15:10:37 +0200 Subject: [PATCH 2/6] add description --- .../filters/google_translation_filter_pipeline.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/examples/filters/google_translation_filter_pipeline.py b/examples/filters/google_translation_filter_pipeline.py index 8447c90d..892c6c35 100644 --- a/examples/filters/google_translation_filter_pipeline.py +++ b/examples/filters/google_translation_filter_pipeline.py @@ -1,3 +1,14 @@ +""" +title: Google Translate Filter +author: SimonOriginal +date: 2024-06-28 +version: 1.0 +license: MIT +description: This pipeline integrates Google Translate for automatic translation of user and assistant messages +without requiring an API key. It supports multilingual communication by translating based on specified source +and target languages. +""" + from typing import List, Optional from schemas import OpenAIChatMessage from pydantic import BaseModel From 539cbe1a4a2c5e3d86d677815411507e85501c5e Mon Sep 17 00:00:00 2001 From: SimonOriginal Date: Fri, 28 Jun 2024 15:49:10 +0200 Subject: [PATCH 3/6] Added caching and optimization for Markdown table translation --- .../google_translation_filter_pipeline.py | 85 +++++++++++-------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/examples/filters/google_translation_filter_pipeline.py b/examples/filters/google_translation_filter_pipeline.py index 892c6c35..32b6b7d4 100644 --- a/examples/filters/google_translation_filter_pipeline.py +++ b/examples/filters/google_translation_filter_pipeline.py @@ -8,13 +8,15 @@ without requiring an API key. It supports multilingual communication by translating based on specified source and target languages. """ - +import re from typing import List, Optional from schemas import OpenAIChatMessage from pydantic import BaseModel import requests import os import time +import asyncio +from functools import lru_cache from utils.pipelines.main import get_last_user_message, get_last_assistant_message @@ -25,36 +27,33 @@ class Valves(BaseModel): source_user: Optional[str] = "auto" target_user: Optional[str] = "en" source_assistant: Optional[str] = "en" - target_assistant: Optional[str] = "uk" + target_assistant: Optional[str] = "es" def __init__(self): - # Initialize the pipeline type and name self.type = "filter" self.name = "Google Translate Filter" - - # Initialize Valves with default values self.valves = self.Valves( **{ "pipelines": ["*"], } ) + # Initialize translation cache + self.translation_cache = {} + async def on_startup(self): - # Function called when the server is started print(f"on_startup:{__name__}") pass async def on_shutdown(self): - # Function called when the server is stopped print(f"on_shutdown:{__name__}") pass async def on_valves_updated(self): - # Function called when the valves are updated pass + @lru_cache(maxsize=128) # LRU cache to store translation results def translate(self, text: str, source: str, target: str) -> str: - # Function to translate text using Google Translate url = "https://translate.googleapis.com/translate_a/single" params = { "client": "gtx", @@ -65,26 +64,29 @@ def translate(self, text: str, source: str, target: str) -> str: } try: - # Make a GET request to Google Translate API r = requests.get(url, params=params) - r.raise_for_status() # Raise an exception for bad status codes - - # Parse the JSON response + r.raise_for_status() result = r.json() - translated_text = ''.join([sentence[0] for sentence in result[0]]) # Combine all translated sentences into one string + translated_text = ''.join([sentence[0] for sentence in result[0]]) return translated_text except requests.exceptions.RequestException as e: - # Handle network errors, retrying after a short pause print(f"Network error: {e}") - time.sleep(1) # Pause before retrying - return self.translate(text, source, target) # Retry translation + time.sleep(1) + return self.translate(text, source, target) except Exception as e: - # Handle other exceptions print(f"Error translating text: {e}") - return text # Return original text in case of error + return text + + def split_text_around_table(self, text: str) -> List[str]: + table_regex = r'((?:^.*?\|.*?\n)+)(?=\n[^\|\s].*?\|)' + matches = re.split(table_regex, text, flags=re.MULTILINE) + + if len(matches) > 1: + return [matches[0], matches[1]] + else: + return [text, ""] async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: - # Function to process incoming messages from the user print(f"inlet:{__name__}") messages = body["messages"] @@ -92,16 +94,23 @@ async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: print(f"User message: {user_message}") - # Translate user message - translated_user_message = self.translate( - user_message, - self.valves.source_user, - self.valves.target_user, - ) + parts = self.split_text_around_table(user_message) + text_before_table, table_text = parts + + # Check translation cache for text before table + translated_before_table = self.translation_cache.get(text_before_table) + if translated_before_table is None: + translated_before_table = self.translate( + text_before_table, + self.valves.source_user, + self.valves.target_user, + ) + self.translation_cache[text_before_table] = translated_before_table + + translated_user_message = translated_before_table + table_text print(f"Translated user message: {translated_user_message}") - # Update the translated message in the messages list for message in reversed(messages): if message["role"] == "user": message["content"] = translated_user_message @@ -111,7 +120,6 @@ async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: return body async def outlet(self, body: dict, user: Optional[dict] = None) -> dict: - # Function to process outgoing messages from the assistant print(f"outlet:{__name__}") messages = body["messages"] @@ -119,16 +127,23 @@ async def outlet(self, body: dict, user: Optional[dict] = None) -> dict: print(f"Assistant message: {assistant_message}") - # Translate assistant message - translated_assistant_message = self.translate( - assistant_message, - self.valves.source_assistant, - self.valves.target_assistant, - ) + parts = self.split_text_around_table(assistant_message) + text_before_table, table_text = parts + + # Check translation cache for text before table + translated_before_table = self.translation_cache.get(text_before_table) + if translated_before_table is None: + translated_before_table = self.translate( + text_before_table, + self.valves.source_assistant, + self.valves.target_assistant, + ) + self.translation_cache[text_before_table] = translated_before_table + + translated_assistant_message = translated_before_table + table_text print(f"Translated assistant message: {translated_assistant_message}") - # Update the translated message in the messages list for message in reversed(messages): if message["role"] == "assistant": message["content"] = translated_assistant_message From eab74e356139905bf943d23cdcfd9dd8f4fa559c Mon Sep 17 00:00:00 2001 From: SimonOriginal Date: Fri, 28 Jun 2024 16:51:37 +0200 Subject: [PATCH 4/6] Fix table delimiters in translated text --- .../filters/google_translation_filter_pipeline.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/examples/filters/google_translation_filter_pipeline.py b/examples/filters/google_translation_filter_pipeline.py index 32b6b7d4..9e89b39b 100644 --- a/examples/filters/google_translation_filter_pipeline.py +++ b/examples/filters/google_translation_filter_pipeline.py @@ -8,6 +8,7 @@ without requiring an API key. It supports multilingual communication by translating based on specified source and target languages. """ + import re from typing import List, Optional from schemas import OpenAIChatMessage @@ -27,7 +28,7 @@ class Valves(BaseModel): source_user: Optional[str] = "auto" target_user: Optional[str] = "en" source_assistant: Optional[str] = "en" - target_assistant: Optional[str] = "es" + target_assistant: Optional[str] = "uk" def __init__(self): self.type = "filter" @@ -86,6 +87,10 @@ def split_text_around_table(self, text: str) -> List[str]: else: return [text, ""] + def clean_table_delimiters(self, text: str) -> str: + # Remove extra spaces from table delimiters + return re.sub(r'(\|\s*-+\s*)+', lambda m: m.group(0).replace(' ', ''), text) + async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: print(f"inlet:{__name__}") @@ -109,6 +114,9 @@ async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: translated_user_message = translated_before_table + table_text + # Clean table delimiters + translated_user_message = self.clean_table_delimiters(translated_user_message) + print(f"Translated user message: {translated_user_message}") for message in reversed(messages): @@ -142,6 +150,9 @@ async def outlet(self, body: dict, user: Optional[dict] = None) -> dict: translated_assistant_message = translated_before_table + table_text + # Clean table delimiters + translated_assistant_message = self.clean_table_delimiters(translated_assistant_message) + print(f"Translated assistant message: {translated_assistant_message}") for message in reversed(messages): From 05d480b1b2072b68b506172e294e0d0f3bc01073 Mon Sep 17 00:00:00 2001 From: SimonOriginal Date: Sat, 29 Jun 2024 12:47:30 +0200 Subject: [PATCH 5/6] Implemented proper code handling to avoid language translation --- .../google_translation_filter_pipeline.py | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/examples/filters/google_translation_filter_pipeline.py b/examples/filters/google_translation_filter_pipeline.py index 9e89b39b..d4627b7b 100644 --- a/examples/filters/google_translation_filter_pipeline.py +++ b/examples/filters/google_translation_filter_pipeline.py @@ -41,6 +41,7 @@ def __init__(self): # Initialize translation cache self.translation_cache = {} + self.code_blocks = [] # List to store code blocks async def on_startup(self): print(f"on_startup:{__name__}") @@ -99,7 +100,13 @@ async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: print(f"User message: {user_message}") - parts = self.split_text_around_table(user_message) + # Find and store code blocks + code_block_regex = r'```[\s\S]+?```' + self.code_blocks = re.findall(code_block_regex, user_message) + # Replace code blocks with placeholders + user_message_no_code = re.sub(code_block_regex, '__CODE_BLOCK__', user_message) + + parts = self.split_text_around_table(user_message_no_code) text_before_table, table_text = parts # Check translation cache for text before table @@ -117,6 +124,10 @@ async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: # Clean table delimiters translated_user_message = self.clean_table_delimiters(translated_user_message) + # Restore code blocks + for code_block in self.code_blocks: + translated_user_message = translated_user_message.replace('__CODE_BLOCK__', code_block, 1) + print(f"Translated user message: {translated_user_message}") for message in reversed(messages): @@ -135,7 +146,13 @@ async def outlet(self, body: dict, user: Optional[dict] = None) -> dict: print(f"Assistant message: {assistant_message}") - parts = self.split_text_around_table(assistant_message) + # Find and store code blocks + code_block_regex = r'```[\s\S]+?```' + self.code_blocks = re.findall(code_block_regex, assistant_message) + # Replace code blocks with placeholders + assistant_message_no_code = re.sub(code_block_regex, '__CODE_BLOCK__', assistant_message) + + parts = self.split_text_around_table(assistant_message_no_code) text_before_table, table_text = parts # Check translation cache for text before table @@ -153,6 +170,10 @@ async def outlet(self, body: dict, user: Optional[dict] = None) -> dict: # Clean table delimiters translated_assistant_message = self.clean_table_delimiters(translated_assistant_message) + # Restore code blocks + for code_block in self.code_blocks: + translated_assistant_message = translated_assistant_message.replace('__CODE_BLOCK__', code_block, 1) + print(f"Translated assistant message: {translated_assistant_message}") for message in reversed(messages): From 77ae1e9a27165ec0c368165c80824671ef62e214 Mon Sep 17 00:00:00 2001 From: SimonOriginal Date: Sat, 29 Jun 2024 13:00:33 +0200 Subject: [PATCH 6/6] Remove caching --- examples/filters/google_translation_filter_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/filters/google_translation_filter_pipeline.py b/examples/filters/google_translation_filter_pipeline.py index d4627b7b..87ebcee1 100644 --- a/examples/filters/google_translation_filter_pipeline.py +++ b/examples/filters/google_translation_filter_pipeline.py @@ -54,7 +54,7 @@ async def on_shutdown(self): async def on_valves_updated(self): pass - @lru_cache(maxsize=128) # LRU cache to store translation results + # @lru_cache(maxsize=128) # LRU cache to store translation results def translate(self, text: str, source: str, target: str) -> str: url = "https://translate.googleapis.com/translate_a/single" params = { @@ -90,7 +90,7 @@ def split_text_around_table(self, text: str) -> List[str]: def clean_table_delimiters(self, text: str) -> str: # Remove extra spaces from table delimiters - return re.sub(r'(\|\s*-+\s*)+', lambda m: m.group(0).replace(' ', ''), text) + return re.sub(r'(\|\s*-+\s*)+', lambda m: m.group(0).replace(' ', '-'), text) async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: print(f"inlet:{__name__}")