-
Notifications
You must be signed in to change notification settings - Fork 1
/
Briefly.py
67 lines (56 loc) · 2.24 KB
/
Briefly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import ollama
from langchain.vectorstores import FAISS
from langchain_community.llms import Ollama
model = Ollama(model="llama3")
class OllamaEmbeddingWrapper:
def __init__(self, model_name):
self.model_name = model_name
def embed_documents(self, texts):
return [ollama.embeddings(model=self.model_name, prompt=text)['embedding'] for text in texts]
def __call__(self, text):
return ollama.embeddings(model=self.model_name, prompt=text)['embedding']
def get_retriever(chunks):
embedding_model = OllamaEmbeddingWrapper(model_name="nomic-embed-text")
return FAISS.from_texts(texts=chunks,embedding=embedding_model).as_retriever()
def handle_questions(question):
with st.spinner("Retrieving relevent information"):
context = st.session_state.retriever.invoke(question)
with st.spinner("generating"):
st.write_stream(
model.stream(f"Question: {question} \n\n Context: {context}"))
def split_text(raw_text):
splitter = RecursiveCharacterTextSplitter(
separators=["\n\n", "\n", ".", " "],
chunk_size=500,
chunk_overlap=200,
length_function=len
)
return splitter.split_text(raw_text)
def read_docs(documents):
text = ""
for doc in documents :
file = PdfReader(doc)
for page in file.pages:
text += page.extract_text()
return text
def main():
if "retriever" not in st.session_state:
st.session_state.retriever = None
st.set_page_config(page_icon=":scroll:",page_title="Briefly")
st.header("Chat With Your Documents :scroll:")
question = st.text_input("Enter Question !")
if question:
handle_questions(question)
with st.sidebar:
st.subheader("Upload Your Documents")
documents = st.file_uploader("upload your files here", accept_multiple_files=True)
if st.button(":package: Process") :
with st.spinner("Processing") :
raw_text = read_docs(documents)
chunks = split_text(raw_text)
st.session_state.retriever = get_retriever(chunks)
if __name__ == "__main__":
main()