fix: ci

leehanchung · Nov 26, 2023 · 8613716 · 8613716
1 parent e2515f5
commit 8613716
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 42 deletions.
diff --git a/app/app.py b/app/app.py
@@ -1,8 +1,9 @@
 # Chroma compatibility issues, hacking per its documentation
 # https://docs.trychroma.com/troubleshooting#sqlite
-__import__('pysqlite3')
+__import__("pysqlite3")
 import sys
-sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
+
+sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
 from typing import List
 
 from tempfile import NamedTemporaryFile
@@ -12,7 +13,6 @@
 import chromadb
 from chromadb.config import Settings
 from langchain.chains import RetrievalQAWithSourcesChain
-from langchain.chains.base import Chain
 from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import PDFPlumberLoader
 from langchain.embeddings.openai import OpenAIEmbeddings
@@ -53,10 +53,7 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
         # 2. Split the text
         #
         ######################################################################
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=2000,
-            chunk_overlap=100
-        )
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
         ######################################################################
         docs = text_splitter.split_documents(documents)
 
@@ -70,34 +67,25 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
 
 
 def create_search_engine(*, docs: List[Document], embeddings: Embeddings) -> VectorStore:
-
     # Initialize Chromadb client to enable resetting and disable telemtry
     client = chromadb.EphemeralClient()
     client_settings = Settings(
-        chroma_db_impl="duckdb+parquet",
-        anonymized_telemetry=False,
-        persist_directory=".chromadb",
-        allow_reset=True    )
+        chroma_db_impl="duckdb+parquet", anonymized_telemetry=False, persist_directory=".chromadb", allow_reset=True
+    )
 
     # Reset the search engine to ensure we don't use old copies.
     # NOTE: we do not need this for production
-    search_engine = Chroma(
-        client=client,
-        client_settings=client_settings
-    )
+    search_engine = Chroma(client=client, client_settings=client_settings)
     search_engine._client.reset()
 
     ##########################################################################
     #
-    # 4. Create the document search engine. Remember to add 
+    # 4. Create the document search engine. Remember to add
     # client_settings using the above settings.
     #
     ##########################################################################
     search_engine = Chroma.from_documents(
-        client=client,
-        documents=docs,
-        embedding=embeddings,
-        client_settings=client_settings 
+        client=client, documents=docs, embedding=embeddings, client_settings=client_settings
     )
     ##########################################################################
 
@@ -106,7 +94,6 @@ def create_search_engine(*, docs: List[Document], embeddings: Embeddings) -> Vec
 
 @cl.on_chat_start
 async def on_chat_start():
-
     # Asking user to to upload a PDF to chat with
     files = None
     while files is None:
@@ -124,32 +111,23 @@ async def on_chat_start():
     cl.user_session.set("docs", docs)
     msg.content = f"`{file.name}` processed. Loading ..."
     await msg.update()
-    
+
     ##########################################################################
     #
     # 3. Set the Encoder model for creating embeddings
     #
     ##########################################################################
-    embeddings = OpenAIEmbeddings(
-        model="text-embedding-ada-002"
-    )
+    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
     ##########################################################################
     try:
-        search_engine = await cl.make_async(create_search_engine)(
-            docs=docs,
-            embeddings=embeddings
-        )
+        search_engine = await cl.make_async(create_search_engine)(docs=docs, embeddings=embeddings)
     except Exception as e:
         await cl.Message(content=f"Error: {e}").send()
         raise SystemError
     msg.content = f"`{file.name}` loaded. You can now ask questions!"
     await msg.update()
 
-    llm = ChatOpenAI(
-        model='gpt-3.5-turbo-16k-0613',
-        temperature=0,
-        streaming=True
-    )
+    llm = ChatOpenAI(model="gpt-3.5-turbo-16k-0613", temperature=0, streaming=True)
 
     ##########################################################################
     #
@@ -164,10 +142,7 @@ async def on_chat_start():
         # 6. Customize prompts to improve summarization and question
         # answering performance. Perhaps create your own prompt in prompts.py?
         ######################################################################
-        chain_type_kwargs={
-            "prompt": PROMPT,
-            "document_prompt": EXAMPLE_PROMPT
-        },
+        chain_type_kwargs={"prompt": PROMPT, "document_prompt": EXAMPLE_PROMPT},
     )
     ##########################################################################
 
@@ -176,11 +151,9 @@ async def on_chat_start():
 
 @cl.on_message
 async def main(message: cl.Message):
-
     chain = cl.user_session.get("chain")  # type: RetrievalQAWithSourcesChain
     response = await chain.acall(
-        message.content,
-        callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
+        message.content, callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
     )
 
     answer = response["answer"]

diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ select = [
     "F",  # see: https://pypi.org/project/pyflakes
 ]
 ignore = [
+    "E402",  # Module level import not at top of file, due to Chroma hacks
     "E501",
     "E731",
     "E741",  # Ambiguous variable name: ...