This repository has been archived by the owner on Sep 12, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add qdrant and inmemory support (#6)
* add qdrant-client to requirements * add qdrant module * add minor TODO * fix qdrant vectorparams * minor fix * update process_and_get_documents via making path_or_files union * minor fix in markdown_processing module * add in_memory module * minor fix * Fix bug in setup_db module * add todo to inmemory module
- Loading branch information
1 parent
7bc2abf
commit b5fb999
Showing
8 changed files
with
143 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from pathlib import Path | ||
from typing import Union, List | ||
|
||
from llama_index import VectorStoreIndex | ||
from llama_index.storage.storage_context import StorageContext | ||
|
||
from .base import BaseVS | ||
from utils.markdown_processing import process_and_get_documents | ||
|
||
class InMemoryVectorStore(BaseVS): | ||
def __init__(self, path_or_files: Union[Path, List[Path]], read_as_single_doc: bool = True, show_progress: bool = True): | ||
self._path_or_files = path_or_files | ||
self._read_as_single_doc = read_as_single_doc | ||
self._show_progress = show_progress | ||
super().__init__() | ||
|
||
def _validate_requirements(self): | ||
""" | ||
For in-memory, no special requirements to validate. | ||
""" | ||
pass | ||
|
||
def initialize_vectorindex(self): | ||
""" | ||
Create a new vector store index. | ||
""" | ||
# TODO: Add support for other file formats. (pdf, docx, etc.) | ||
documents = process_and_get_documents(path_or_files=self._path_or_files, read_as_single_doc=self._read_as_single_doc) | ||
self._vectorstore = VectorStoreIndex.from_documents( | ||
documents=documents, | ||
show_progress=self._show_progress | ||
) | ||
|
||
def connect_vectorstore(self): | ||
""" | ||
Connect to an existing vector store index. Sets self._vectorstore. | ||
""" | ||
# For in-memory, the initialization and connection can be the same. | ||
self.initialize_vectorindex() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
from llama_index.vector_stores.qdrant import QdrantVectorStore | ||
|
||
from utils.env_utils import read_env_variable, validate_environment_variables | ||
from .base import BaseVS | ||
|
||
class QdrantVS(BaseVS): | ||
def __init__(self, collection_name: str, size: int = 1536, distance: str = "EUCLID"): | ||
self._collection_name = collection_name | ||
self._size = size | ||
self._distance = distance | ||
self._client = None | ||
super().__init__() | ||
|
||
def _validate_requirements(self): | ||
""" | ||
Validate all required env variables are present, and all required packages are installed. | ||
""" | ||
required_env_variables = ["QDRANT_API_KEY", "QDRANT_URL"] | ||
|
||
validate_environment_variables(required_env_variables) | ||
|
||
try: | ||
import qdrant_client | ||
except ImportError: | ||
raise ImportError("`qdrant-client` package not found, please run `pip install qdrant-client==1.5.4`") | ||
|
||
def _initialize_client(self): | ||
""" | ||
Initialize the Qdrant client if not already initialized. | ||
""" | ||
from qdrant_client import QdrantClient | ||
|
||
# If client already initialized, return | ||
if self._client is not None: | ||
return | ||
|
||
# Read environment variables for Qdrant initialization | ||
url = read_env_variable("QDRANT_URL") | ||
api_key = read_env_variable("QDRANT_API_KEY") | ||
|
||
self._client = QdrantClient( | ||
url=self._url, | ||
api_key=self._api_key | ||
) | ||
|
||
def initialize_vectorindex(self): | ||
""" | ||
Create a new vector store index. | ||
""" | ||
from qdrant_client.models import Distance, VectorParams | ||
|
||
# Initialize client | ||
self._initialize_client() | ||
|
||
# Convert string distance measure to Distance Enum equals to Distance.EUCLID | ||
distance = Distance[self._distance] | ||
|
||
# Create index | ||
self._client.recreate_collection( | ||
collection_name=self._collection_name, | ||
vectors_config=VectorParams(size=self._size, distance=distance) | ||
) | ||
|
||
def connect_vectorstore(self): | ||
""" | ||
Connect to an existing vector store index. Sets self._vectorstore. | ||
""" | ||
# Initialize client | ||
self._initialize_client() | ||
|
||
# Construct vector store | ||
self._vectorstore = QdrantVectorStore( | ||
collection_name=self._collection_name, | ||
client=self._client | ||
) |