Replies: 1 comment 1 reply
-
Hey @sonhm3029! I'm here to help you with any bugs, questions, or contributions while you wait for a human maintainer. Let's tackle this issue together! To address the issue of corrupted
Here is an example implementation in Python: import os
import shutil
import pickle
from pathlib import Path
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
class FaissVectorDB:
def __init__(self, vector_db_path="vectorstores/faiss", embed_model="text-embedding-3-large"):
self.embedding_model = OpenAIEmbeddings(model=embed_model, api_key=os.environ["OPENAI_API_KEY"])
self.vector_db_path = vector_db_path
if os.path.exists(vector_db_path) and os.path.exists(f"{vector_db_path}/index.faiss"):
self.db = self.load_local_with_backup(vector_db_path, self.embedding_model)
else:
self.db = None
def save_local_with_backup(self, folder_path: str, index_name: str = "index") -> None:
path = Path(folder_path)
path.mkdir(exist_ok=True, parents=True)
# Backup existing index.pkl file
pkl_file_path = path / f"{index_name}.pkl"
backup_file_path = path / f"{index_name}_backup.pkl"
if pkl_file_path.exists():
shutil.copy(pkl_file_path, backup_file_path)
# Save index separately since it is not picklable
faiss = FAISS.dependable_faiss_import()
faiss.write_index(self.db.index, str(path / f"{index_name}.faiss"))
# Save docstore and index_to_docstore_id
with open(pkl_file_path, "wb") as f:
pickle.dump((self.db.docstore, self.db.index_to_docstore_id), f)
def load_local_with_backup(self, folder_path: str, embeddings: OpenAIEmbeddings, index_name: str = "index") -> FAISS:
path = Path(folder_path)
pkl_file_path = path / f"{index_name}.pkl"
backup_file_path = path / f"{index_name}_backup.pkl"
# Load index separately since it is not picklable
faiss = FAISS.dependable_faiss_import()
index = faiss.read_index(str(path / f"{index_name}.faiss"))
# Attempt to load docstore and index_to_docstore_id
try:
with open(pkl_file_path, "rb") as f:
docstore, index_to_docstore_id = pickle.load(f)
except (pickle.UnpicklingError, EOFError):
# If loading fails, restore from backup
if backup_file_path.exists():
shutil.copy(backup_file_path, pkl_file_path)
with open(pkl_file_path, "rb") as f:
docstore, index_to_docstore_id = pickle.load(f)
else:
raise RuntimeError("Both index.pkl and its backup are corrupted or missing.")
return FAISS(embeddings, index, docstore, index_to_docstore_id) This implementation ensures that a backup of the |
Beta Was this translation helpful? Give feedback.
-
Checked other resources
Commit to Help
Example Code
Description
I am using FAISS in my flask app. When start the app, i will init the FaissVectorDB and read it into memory. But recently, sometime my linux server die unexpectedly, then when restart the server, i found that the index.pkl file that store data and index is corrupted. I get the error: Ran out of input. That lose all my data.
Anyone met the same problem as me. Any idea and suggestion for me to find best way to run my vectordb.
I use my vectordb for RAG pipeline and because i think it will time consuming if each time use vectordb, i have to load again so i load the vectordb just one time at the starting falsk app.
System Info
python==3.9.13
angchain==0.1.9
langchain-community==0.0.24
Beta Was this translation helpful? Give feedback.
All reactions