Skip to content

Commit

Permalink
fix: ci
Browse files Browse the repository at this point in the history
  • Loading branch information
leehanchung committed Nov 26, 2023
1 parent e2515f5 commit 8613716
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 42 deletions.
57 changes: 15 additions & 42 deletions app/app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Chroma compatibility issues, hacking per its documentation
# https://docs.trychroma.com/troubleshooting#sqlite
__import__('pysqlite3')
__import__("pysqlite3")
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
from typing import List

from tempfile import NamedTemporaryFile
Expand All @@ -12,7 +13,6 @@
import chromadb
from chromadb.config import Settings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.base import Chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PDFPlumberLoader
from langchain.embeddings.openai import OpenAIEmbeddings
Expand Down Expand Up @@ -53,10 +53,7 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
# 2. Split the text
#
######################################################################
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=2000,
chunk_overlap=100
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
######################################################################
docs = text_splitter.split_documents(documents)

Expand All @@ -70,34 +67,25 @@ def process_file(*, file: AskFileResponse) -> List[Document]:


def create_search_engine(*, docs: List[Document], embeddings: Embeddings) -> VectorStore:

# Initialize Chromadb client to enable resetting and disable telemtry
client = chromadb.EphemeralClient()
client_settings = Settings(
chroma_db_impl="duckdb+parquet",
anonymized_telemetry=False,
persist_directory=".chromadb",
allow_reset=True )
chroma_db_impl="duckdb+parquet", anonymized_telemetry=False, persist_directory=".chromadb", allow_reset=True
)

# Reset the search engine to ensure we don't use old copies.
# NOTE: we do not need this for production
search_engine = Chroma(
client=client,
client_settings=client_settings
)
search_engine = Chroma(client=client, client_settings=client_settings)
search_engine._client.reset()

##########################################################################
#
# 4. Create the document search engine. Remember to add
# 4. Create the document search engine. Remember to add
# client_settings using the above settings.
#
##########################################################################
search_engine = Chroma.from_documents(
client=client,
documents=docs,
embedding=embeddings,
client_settings=client_settings
client=client, documents=docs, embedding=embeddings, client_settings=client_settings
)
##########################################################################

Expand All @@ -106,7 +94,6 @@ def create_search_engine(*, docs: List[Document], embeddings: Embeddings) -> Vec

@cl.on_chat_start
async def on_chat_start():

# Asking user to to upload a PDF to chat with
files = None
while files is None:
Expand All @@ -124,32 +111,23 @@ async def on_chat_start():
cl.user_session.set("docs", docs)
msg.content = f"`{file.name}` processed. Loading ..."
await msg.update()

##########################################################################
#
# 3. Set the Encoder model for creating embeddings
#
##########################################################################
embeddings = OpenAIEmbeddings(
model="text-embedding-ada-002"
)
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
##########################################################################
try:
search_engine = await cl.make_async(create_search_engine)(
docs=docs,
embeddings=embeddings
)
search_engine = await cl.make_async(create_search_engine)(docs=docs, embeddings=embeddings)
except Exception as e:
await cl.Message(content=f"Error: {e}").send()
raise SystemError
msg.content = f"`{file.name}` loaded. You can now ask questions!"
await msg.update()

llm = ChatOpenAI(
model='gpt-3.5-turbo-16k-0613',
temperature=0,
streaming=True
)
llm = ChatOpenAI(model="gpt-3.5-turbo-16k-0613", temperature=0, streaming=True)

##########################################################################
#
Expand All @@ -164,10 +142,7 @@ async def on_chat_start():
# 6. Customize prompts to improve summarization and question
# answering performance. Perhaps create your own prompt in prompts.py?
######################################################################
chain_type_kwargs={
"prompt": PROMPT,
"document_prompt": EXAMPLE_PROMPT
},
chain_type_kwargs={"prompt": PROMPT, "document_prompt": EXAMPLE_PROMPT},
)
##########################################################################

Expand All @@ -176,11 +151,9 @@ async def on_chat_start():

@cl.on_message
async def main(message: cl.Message):

chain = cl.user_session.get("chain") # type: RetrievalQAWithSourcesChain
response = await chain.acall(
message.content,
callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
message.content, callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
)

answer = response["answer"]
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ select = [
"F", # see: https://pypi.org/project/pyflakes
]
ignore = [
"E402", # Module level import not at top of file, due to Chroma hacks
"E501",
"E731",
"E741", # Ambiguous variable name: ...
Expand Down

0 comments on commit 8613716

Please sign in to comment.