Skip to content

Commit

Permalink
Merge pull request #9 from upstash/support_numpy_arrays
Browse files Browse the repository at this point in the history
support numpy and pandas arrays
  • Loading branch information
burak-upstash authored Jan 25, 2024
2 parents 43ef0aa + 1008d8c commit 3d6d104
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:

- name: Run mypy
run: |
poetry run mypy --show-error-codes --install-types .
poetry run mypy --show-error-codes --install-types --non-interactive .
- name: Run ruff
run: |
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ types-setuptools = "^69.0.0.20240115"
pytest = "^7.4.4"
pytest-asyncio="^0.20.0"
ruff = "^0.1.13"
numpy = [
{version = "<=1.24.4", python = "<=3.8"},
{version = ">=1.25.0", python = ">=3.9"}
]
pandas = "^2.0.3"
pandas-stubs = "^2.0.3"

[build-system]
requires = ["poetry-core"]
Expand Down
173 changes: 173 additions & 0 deletions tests/core/test_upsert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from upstash_vector import Index, AsyncIndex
from upstash_vector.types import Vector

import numpy as np
import pandas as pd


def test_upsert_tuple(index: Index):
v1_id = "id1"
Expand Down Expand Up @@ -179,3 +182,173 @@ async def test_upsert_vector_async(async_index: AsyncIndex):
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values


def test_upsert_tuple_with_numpy(index: Index):
v1_id = "id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_values = np.array([0.1, 0.2])

v2_id = "id2"
v2_values = np.array([0.3, 0.4])

index.upsert(
vectors=[
(v1_id, v1_values, v1_metadata),
(v2_id, v2_values),
]
)

res = index.fetch(ids=[v1_id, v2_id], include_vectors=True, include_metadata=True)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values.tolist()

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values.tolist()


def test_upsert_dict_with_numpy(index: Index):
v1_id = "dict_id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_values = np.array([0.1, 0.2])

v2_id = "dict_id2"
v2_values = np.array([0.3, 0.4])

index.upsert(
vectors=[
{"id": v1_id, "vector": v1_values, "metadata": v1_metadata},
{"id": v2_id, "vector": v2_values},
]
)

res = index.fetch(ids=[v1_id, v2_id], include_vectors=True, include_metadata=True)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values.tolist()

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values.tolist()


def test_upsert_vector_with_numpy(index: Index):
v1_id = "vector_id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_values = np.array([0.1, 0.2])

v2_id = "vector_id2"
v2_values = np.array([0.3, 0.4])

index.upsert(
vectors=[
Vector(id=v1_id, vector=v1_values, metadata=v1_metadata),
Vector(id=v2_id, vector=v2_values),
]
)

res = index.fetch(ids=[v1_id, v2_id], include_vectors=True, include_metadata=True)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values.tolist()

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values.tolist()


def test_upsert_tuple_with_pandas(index: Index):
v1_id = "id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_values = pd.array([0.1, 0.2])

v2_id = "id2"
v2_values = pd.array([0.3, 0.4])

assert v2_values == [0.3, 0.4]

index.upsert(
vectors=[
(v1_id, v1_values, v1_metadata),
(v2_id, v2_values),
]
)

res = index.fetch(ids=[v1_id, v2_id], include_vectors=True, include_metadata=True)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values


def test_upsert_dict_with_pandas(index: Index):
v1_id = "dict_id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_values = pd.array([0.1, 0.2])

v2_id = "dict_id2"
v2_values = pd.array([0.3, 0.4])

index.upsert(
vectors=[
{"id": v1_id, "vector": v1_values, "metadata": v1_metadata},
{"id": v2_id, "vector": v2_values},
]
)

res = index.fetch(ids=[v1_id, v2_id], include_vectors=True, include_metadata=True)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values


def test_upsert_vector_with_pandas(index: Index):
v1_id = "vector_id1"
v1_metadata = {"metadata_field": "metadata_value"}
v1_values = pd.array([0.1, 0.2])

v2_id = "vector_id2"
v2_values = pd.array([0.3, 0.4])

index.upsert(
vectors=[
Vector(id=v1_id, vector=v1_values, metadata=v1_metadata),
Vector(id=v2_id, vector=v2_values),
]
)

res = index.fetch(ids=[v1_id, v2_id], include_vectors=True, include_metadata=True)

assert res[0] is not None
assert res[0].id == v1_id
assert res[0].metadata == v1_metadata
assert res[0].vector == v1_values

assert res[1] is not None
assert res[1].id == v2_id
assert res[1].metadata is None
assert res[1].vector == v2_values
9 changes: 7 additions & 2 deletions upstash_vector/types.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from dataclasses import dataclass
from typing import Optional, List, Dict
from typing import Optional, List, Dict, Union, Protocol


class SupportsToList(Protocol):
def tolist(self) -> List[float]:
...


@dataclass
class Vector:
id: str
vector: List[float]
vector: Union[List[float], SupportsToList]
metadata: Optional[Dict] = None


Expand Down
24 changes: 19 additions & 5 deletions upstash_vector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,25 @@
from typing import List, Dict


def _convert_to_list(obj):
if isinstance(obj, list):
return obj
elif hasattr(obj, "tolist") and callable(getattr(obj, "tolist")):
return obj.tolist()

raise TypeError(
f"Expected a list or something can be converted to a list(like numpy or pandas array) but got {type(obj)}"
)


def _tuple_to_vector(vector) -> Vector:
if len(vector) < 2 or len(vector) > 3:
raise ClientError("Tuple must be in the format (id, vector, metadata)")

if len(vector) == 2:
return Vector(id=vector[0], vector=vector[1])
return Vector(id=vector[0], vector=_convert_to_list(vector[1]))

return Vector(id=vector[0], vector=vector[1], metadata=vector[2])
return Vector(id=vector[0], vector=_convert_to_list(vector[1]), metadata=vector[2])


def _dict_to_vector(vector) -> Vector:
Expand All @@ -21,11 +32,14 @@ def _dict_to_vector(vector) -> Vector:
if vector.get("metadata") is not None:
metadata = vector["metadata"]

return Vector(id=vector["id"], vector=vector["vector"], metadata=metadata)
return Vector(
id=vector["id"], vector=_convert_to_list(vector["vector"]), metadata=metadata
)


def _map_or_dict_to_vectors(vector) -> Vector:
def _tuple_or_dict_to_vectors(vector) -> Vector:
if isinstance(vector, Vector):
vector.vector = _convert_to_list(vector.vector)
return vector
elif isinstance(vector, tuple):
return _tuple_to_vector(vector)
Expand All @@ -38,4 +52,4 @@ def _map_or_dict_to_vectors(vector) -> Vector:


def convert_to_vectors(vectors) -> List[Vector]:
return [_map_or_dict_to_vectors(vector) for vector in vectors]
return [_tuple_or_dict_to_vectors(vector) for vector in vectors]

0 comments on commit 3d6d104

Please sign in to comment.