From 73a90c193c9839f034350a5073cb8d7f12fd3aea Mon Sep 17 00:00:00 2001 From: Matvey Arye Date: Mon, 19 Aug 2024 17:13:16 -0400 Subject: [PATCH] Add option to specify the schema for the table --- README.md | 3 + nbs/00_vector.ipynb | 1133 ++++++++++++++++++----------------- nbs/index.ipynb | 4 +- timescale_vector/_modidx.py | 8 +- timescale_vector/client.py | 70 ++- 5 files changed, 660 insertions(+), 558 deletions(-) diff --git a/README.md b/README.md index 5f04e04..89e574f 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,9 @@ The client constructor takes three required arguments: | table_name | Name of the table to use for storing the embeddings. Think of this as the collection name | | num_dimensions | Number of dimensions in the vector | +You can also specify the schema name, distance type, primary key type, +etc. as optional parameters. Please see the documentation for details. + ``` python vec = client.Sync(service_url, "my_data", 2) ``` diff --git a/nbs/00_vector.ipynb b/nbs/00_vector.ipynb index abbf7be..363ea1b 100644 --- a/nbs/00_vector.ipynb +++ b/nbs/00_vector.ipynb @@ -603,7 +603,8 @@ " distance_type: str,\n", " id_type: str,\n", " time_partition_interval: Optional[timedelta],\n", - " infer_filters: bool) -> None:\n", + " infer_filters: bool,\n", + " schema_name: Optional[str]) -> None:\n", " \"\"\"\n", " Initializes a base Vector object to generate queries for vector clients.\n", "\n", @@ -617,8 +618,15 @@ " The distance type for indexing.\n", " id_type\n", " The type of the id column. Can be either 'UUID' or 'TEXT'.\n", + " time_partition_interval\n", + " The time interval for partitioning the table (optional).\n", + " infer_filters\n", + " Whether to infer start and end times from the special __start_date and __end_date filters.\n", + " schema_name\n", + " The schema name for the table (optional, uses the database's default schema if not specified).\n", " \"\"\"\n", " self.table_name = table_name\n", + " self.schema_name = schema_name\n", " self.num_dimensions = num_dimensions\n", " if distance_type == 'cosine' or distance_type == '<=>':\n", " self.distance_type = '<=>'\n", @@ -652,6 +660,12 @@ " str: The quoted identifier.\n", " \"\"\"\n", " return '\"{}\"'.format(ident.replace('\"', '\"\"'))\n", + " \n", + " def _quoted_table_name(self):\n", + " if self.schema_name is not None:\n", + " return self._quote_ident(self.schema_name) + \".\" + self._quote_ident(self.table_name)\n", + " else:\n", + " return self._quote_ident(self.table_name)\n", "\n", " def get_row_exists_query(self):\n", " \"\"\"\n", @@ -661,7 +675,7 @@ " -------\n", " str: The query to check for row existence.\n", " \"\"\"\n", - " return \"SELECT 1 FROM {table_name} LIMIT 1\".format(table_name=self._quote_ident(self.table_name))\n", + " return \"SELECT 1 FROM {table_name} LIMIT 1\".format(table_name=self._quoted_table_name())\n", "\n", " def get_upsert_query(self):\n", " \"\"\"\n", @@ -671,7 +685,7 @@ " -------\n", " str: The upsert query.\n", " \"\"\"\n", - " return \"INSERT INTO {table_name} (id, metadata, contents, embedding) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING\".format(table_name=self._quote_ident(self.table_name))\n", + " return \"INSERT INTO {table_name} (id, metadata, contents, embedding) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING\".format(table_name=self._quoted_table_name())\n", "\n", " def get_approx_count_query(self):\n", " \"\"\"\n", @@ -682,7 +696,7 @@ " str: the query.\n", " \"\"\"\n", " # todo optimize with approx\n", - " return \"SELECT COUNT(*) as cnt FROM {table_name}\".format(table_name=self._quote_ident(self.table_name))\n", + " return \"SELECT COUNT(*) as cnt FROM {table_name}\".format(table_name=self._quoted_table_name())\n", "\n", " #| export\n", " def get_create_query(self):\n", @@ -732,7 +746,7 @@ " time_partitioning_func=>'public.uuid_timestamp', \n", " chunk_time_interval => '{chunk_time_interval} seconds'::interval);\n", " '''.format(\n", - " table_name=self._quote_ident(self.table_name), \n", + " table_name=self._quoted_table_name(), \n", " chunk_time_interval=str(self.time_partition_interval.total_seconds()),\n", " )\n", " return '''\n", @@ -751,36 +765,42 @@ "\n", "{hypertable_sql}\n", "'''.format(\n", - " table_name=self._quote_ident(self.table_name), \n", + " table_name=self._quoted_table_name(), \n", " id_type=self.id_type, \n", " index_name=self._quote_ident(self.table_name+\"_meta_idx\"), \n", " dimensions=self.num_dimensions,\n", " hypertable_sql=hypertable_sql,\n", " )\n", - "\n", - " def _get_embedding_index_name(self):\n", + " \n", + " def _get_embedding_index_name_quoted(self):\n", " return self._quote_ident(self.table_name+\"_embedding_idx\")\n", + " \n", + " def _get_schema_qualified_embedding_index_name_quoted(self):\n", + " if self.schema_name is not None:\n", + " return self._quote_ident(self.schema_name)+\".\"+self._get_embedding_index_name_quoted()\n", + " else:\n", + " return self._get_embedding_index_name_quoted()\n", "\n", " def drop_embedding_index_query(self):\n", - " return \"DROP INDEX IF EXISTS {index_name};\".format(index_name=self._get_embedding_index_name())\n", + " return \"DROP INDEX IF EXISTS {schema_qualified_index_name};\".format(schema_qualified_index_name=self._get_schema_qualified_embedding_index_name_quoted())\n", "\n", " def delete_all_query(self):\n", - " return \"TRUNCATE {table_name};\".format(table_name=self._quote_ident(self.table_name))\n", + " return \"TRUNCATE {table_name};\".format(table_name=self._quoted_table_name())\n", "\n", " def delete_by_ids_query(self, ids: Union[List[uuid.UUID], List[str]]) -> Tuple[str, List]:\n", " query = \"DELETE FROM {table_name} WHERE id = ANY($1::{id_type}[]);\".format(\n", - " table_name=self._quote_ident(self.table_name), id_type=self.id_type)\n", + " table_name=self._quoted_table_name(), id_type=self.id_type)\n", " return (query, [ids])\n", "\n", " def delete_by_metadata_query(self, filter: Union[Dict[str, str], List[Dict[str, str]]]) -> Tuple[str, List]:\n", " params: List[Any] = []\n", " (where, params) = self._where_clause_for_filter(params, filter)\n", " query = \"DELETE FROM {table_name} WHERE {where};\".format(\n", - " table_name=self._quote_ident(self.table_name), where=where)\n", + " table_name=self._quoted_table_name(), where=where)\n", " return (query, params)\n", "\n", " def drop_table_query(self):\n", - " return \"DROP TABLE IF EXISTS {table_name};\".format(table_name=self._quote_ident(self.table_name))\n", + " return \"DROP TABLE IF EXISTS {table_name};\".format(table_name=self._quoted_table_name())\n", " \n", " def default_max_db_connection_query(self):\n", " \"\"\"\n", @@ -805,8 +825,8 @@ " str: The index creation query.\n", " \"\"\"\n", " column_name = \"embedding\"\n", - " index_name = self._get_embedding_index_name()\n", - " query = index.create_index_query(self._quote_ident(self.table_name), self._quote_ident(column_name), index_name, self.distance_type, num_records_callback)\n", + " index_name_quoted = self._get_embedding_index_name_quoted()\n", + " query = index.create_index_query(self._quoted_table_name(), self._quote_ident(column_name), index_name_quoted, self.distance_type, num_records_callback)\n", " return query\n", "\n", " def _where_clause_for_filter(self, params: List, filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]]) -> Tuple[str, List]:\n", @@ -898,7 +918,7 @@ " {where}\n", " {order_by_clause}\n", " LIMIT {limit}\n", - " '''.format(distance=distance, order_by_clause=order_by_clause, where=where, table_name=self._quote_ident(self.table_name), limit=limit)\n", + " '''.format(distance=distance, order_by_clause=order_by_clause, where=where, table_name=self._quoted_table_name(), limit=limit)\n", " return (query, params)" ] }, @@ -912,24 +932,24 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L562){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L570){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### QueryBuilder.get_create_query\n", "\n", "> QueryBuilder.get_create_query ()\n", "\n", - "Generates a query to create the tables, indexes, and extensions needed to store the vector data." + "*Generates a query to create the tables, indexes, and extensions needed to store the vector data.*" ], "text/plain": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L562){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L570){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### QueryBuilder.get_create_query\n", "\n", "> QueryBuilder.get_create_query ()\n", "\n", - "Generates a query to create the tables, indexes, and extensions needed to store the vector data." + "*Generates a query to create the tables, indexes, and extensions needed to store the vector data.*" ] }, "execution_count": null, @@ -967,6 +987,7 @@ " time_partition_interval: Optional[timedelta] = None,\n", " max_db_connections: Optional[int] = None,\n", " infer_filters: bool = True,\n", + " schema_name: Optional[str] = None,\n", " ) -> None:\n", " \"\"\"\n", " Initializes a async client for storing vector data.\n", @@ -983,9 +1004,15 @@ " The distance type for indexing.\n", " id_type\n", " The type of the id column. Can be either 'UUID' or 'TEXT'.\n", + " time_partition_interval\n", + " The time interval for partitioning the table (optional).\n", + " infer_filters\n", + " Whether to infer start and end times from the special __start_date and __end_date filters.\n", + " schema_name\n", + " The schema name for the table (optional, uses the database's default schema if not specified).\n", " \"\"\"\n", " self.builder = QueryBuilder(\n", - " table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters)\n", + " table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters, schema_name)\n", " self.service_url = service_url\n", " self.pool = None\n", " self.max_db_connections = max_db_connections\n", @@ -1232,24 +1259,24 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L900){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L915){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Async.create_tables\n", "\n", "> Async.create_tables ()\n", "\n", - "Creates necessary tables." + "*Creates necessary tables.*" ], "text/plain": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L900){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L915){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Async.create_tables\n", "\n", "> Async.create_tables ()\n", "\n", - "Creates necessary tables." + "*Creates necessary tables.*" ] }, "execution_count": null, @@ -1271,24 +1298,24 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L900){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L915){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Async.create_tables\n", "\n", "> Async.create_tables ()\n", "\n", - "Creates necessary tables." + "*Creates necessary tables.*" ], "text/plain": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L900){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L915){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Async.create_tables\n", "\n", "> Async.create_tables ()\n", "\n", - "Creates necessary tables." + "*Creates necessary tables.*" ] }, "execution_count": null, @@ -1310,7 +1337,7 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1001){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1016){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Async.search\n", "\n", @@ -1320,7 +1347,7 @@ "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None,\n", "> query_params:Optional[__main__.QueryParams]=None)\n", "\n", - "Retrieves similar records using a similarity query.\n", + "*Retrieves similar records using a similarity query.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", @@ -1335,7 +1362,7 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1001){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1016){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Async.search\n", "\n", @@ -1345,7 +1372,7 @@ "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None,\n", "> query_params:Optional[__main__.QueryParams]=None)\n", "\n", - "Retrieves similar records using a similarity query.\n", + "*Retrieves similar records using a similarity query.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", @@ -1377,6 +1404,8 @@ "con = await asyncpg.connect(service_url)\n", "await con.execute(\"DROP TABLE IF EXISTS data_table;\")\n", "await con.execute(\"DROP EXTENSION IF EXISTS vector CASCADE;\")\n", + "await con.execute(\"DROP SCHEMA IF EXISTS tschema CASCADE;\")\n", + "await con.execute(\"CREATE SCHEMA tschema;\")\n", "await con.close()" ] }, @@ -1394,279 +1423,280 @@ "metadata": {}, "outputs": [], "source": [ - "vec = Async(service_url, \"data_table\", 2)\n", - "await vec.create_tables()\n", - "empty = await vec.table_is_empty()\n", - "assert empty\n", - "await vec.upsert([(uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", - "empty = await vec.table_is_empty()\n", - "assert not empty\n", - "\n", - "await vec.upsert([\n", - " (uuid.uuid4(), '''{\"key\":\"val\"}''', \"the brown fox\", [1.0, 1.3]),\n", - " (uuid.uuid4(), '''{\"key\":\"val2\", \"key_10\": \"10\", \"key_11\": \"11.3\"}''', \"the brown fox\", [1.0, 1.4]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.5]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val2\"}''', \"the brown fox\", [1.0, 1.7]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.9]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 100.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 101.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key_1\":\"val_1\", \"key_2\":\"val_2\"}''',\n", - " \"the brown fox\", [1.0, 1.8]),\n", - "\n", - " (uuid.uuid4(), '''{\"key0\": [1,2,3,4]}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key0\": [8,9,\"A\"]}''', \"the brown fox\", [1.0, 1.8]), # mixed types\n", - " (uuid.uuid4(), '''{\"key0\": [5,6,7], \"key3\": 3}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key0\": [\"B\", \"C\"]}''', \"the brown fox\", [1.0, 1.8]),\n", - "\n", - "])\n", - "\n", - "await vec.create_embedding_index(IvfflatIndex())\n", - "await vec.drop_embedding_index()\n", - "await vec.create_embedding_index(IvfflatIndex(100))\n", - "await vec.drop_embedding_index()\n", - "await vec.create_embedding_index(HNSWIndex())\n", - "await vec.drop_embedding_index()\n", - "await vec.create_embedding_index(HNSWIndex(20, 125))\n", - "await vec.drop_embedding_index()\n", - "await vec.create_embedding_index(DiskAnnIndex())\n", - "await vec.drop_embedding_index()\n", - "await vec.create_embedding_index(DiskAnnIndex(50, 50, 1.5, \"memory_optimized\", 2, 1))\n", - "\n", - "rec = await vec.search([1.0, 2.0])\n", - "assert len(rec) == 10\n", - "rec = await vec.search([1.0, 2.0], limit=4)\n", - "assert len(rec) == 4\n", - "rec = await vec.search(limit=4)\n", - "assert len(rec) == 4\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"val2\"})\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"does not exist\"})\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter={\"key_1\": \"val_1\"})\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], filter={\"key_1\": \"val_1\", \"key_2\": \"val_2\"})\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter={\"key_1\": \"val_1\", \"key_2\": \"val_3\"})\n", - "assert len(rec) == 0\n", - "rec = await vec.search(limit=4, filter={\"key_1\": \"val_1\", \"key_2\": \"val_3\"})\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 2\n", - "rec = await vec.search(limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 2\n", - "\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}, {\"no such key\": \"no such val\"}])\n", - "assert len(rec) == 2\n", - "\n", - "assert isinstance(rec[0][SEARCH_RESULT_METADATA_IDX], dict)\n", - "assert isinstance(rec[0][\"metadata\"], dict)\n", - "assert rec[0][\"contents\"] == \"the brown fox\"\n", - "\n", - "\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\")))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"==\", \"val2\")))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key\", \"==\", \"val2\"))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 10))\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<=\", 10))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<=\", 10.0))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_11\", \"<=\", 11.3))\n", - "assert len(rec) == 1\n", - "rec = await vec.search(limit=4, predicates=Predicates(\"key_11\", \">=\", 11.29999))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_11\", \"<\", 11.299999))\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [1, 2]))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [3, 7]))\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [42]))\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [4]))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [9, \"A\"]))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [\"A\"]))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", (\"C\", \"B\")))\n", - "assert len(rec) == 1\n", - "\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*[(\"key\", \"val2\"), (\"key_10\", \"<\", 100)]))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\"), (\"key_10\", \"<\", 100), operator='AND'))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\"), (\"key_2\", \"val_2\"), operator='OR'))\n", - "assert len(rec) == 2\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100) & (Predicates(\"key\",\"==\", \"val2\",) | Predicates(\"key_2\", \"==\", \"val_2\"))) \n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100) and (Predicates(\"key\",\"==\", \"val2\") or Predicates(\"key_2\",\"==\", \"val_2\"))) \n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [6,7]) and Predicates(\"key3\",\"==\", 3))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [6,7]) and Predicates(\"key3\",\"==\", 6))\n", - "assert len(rec) == 0\n", - "rec = await vec.search(limit=4, predicates=~Predicates((\"key\", \"val2\"), (\"key_10\", \"<\", 100)))\n", - "assert len(rec) == 4\n", - "\n", - "raised = False\n", - "try:\n", - " # can't upsert using both keys and dictionaries\n", - " await vec.upsert([\n", - " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2])\n", - " ])\n", - "except ValueError as e:\n", - " raised = True\n", - "assert raised\n", + "for schema in [\"tschema\", None]:\n", + " vec = Async(service_url, \"data_table\", 2, schema_name=schema)\n", + " await vec.create_tables()\n", + " empty = await vec.table_is_empty()\n", + " assert empty\n", + " await vec.upsert([(uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", + " empty = await vec.table_is_empty()\n", + " assert not empty\n", "\n", - "raised = False\n", - "try:\n", - " # can't upsert using both keys and dictionaries opposite order\n", " await vec.upsert([\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2]),\n", - " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " (uuid.uuid4(), '''{\"key\":\"val\"}''', \"the brown fox\", [1.0, 1.3]),\n", + " (uuid.uuid4(), '''{\"key\":\"val2\", \"key_10\": \"10\", \"key_11\": \"11.3\"}''', \"the brown fox\", [1.0, 1.4]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.5]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val2\"}''', \"the brown fox\", [1.0, 1.7]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.9]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 100.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 101.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key_1\":\"val_1\", \"key_2\":\"val_2\"}''',\n", + " \"the brown fox\", [1.0, 1.8]),\n", + "\n", + " (uuid.uuid4(), '''{\"key0\": [1,2,3,4]}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key0\": [8,9,\"A\"]}''', \"the brown fox\", [1.0, 1.8]), # mixed types\n", + " (uuid.uuid4(), '''{\"key0\": [5,6,7], \"key3\": 3}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key0\": [\"B\", \"C\"]}''', \"the brown fox\", [1.0, 1.8]),\n", + " \n", " ])\n", - "except BaseException as e:\n", - " raised = True\n", - "assert raised\n", - "\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 2\n", - "await vec.delete_by_ids([rec[0][\"id\"]])\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 1\n", - "await vec.delete_by_metadata([{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", - "assert len(rec) == 4\n", - "await vec.delete_by_metadata([{\"key2\": \"val\"}])\n", - "rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", - "assert len(rec) == 0\n", - "\n", - "assert not await vec.table_is_empty()\n", - "await vec.delete_all()\n", - "assert await vec.table_is_empty()\n", - "\n", - "await vec.drop_table()\n", - "await vec.close()\n", - "\n", - "vec = Async(service_url, \"data_table\", 2, id_type=\"TEXT\")\n", - "await vec.create_tables()\n", - "empty = await vec.table_is_empty()\n", - "assert empty\n", - "await vec.upsert([(\"Not a valid UUID\", {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", - "empty = await vec.table_is_empty()\n", - "assert not empty\n", - "await vec.delete_by_ids([\"Not a valid UUID\"])\n", - "empty = await vec.table_is_empty()\n", - "assert empty\n", - "await vec.drop_table()\n", - "await vec.close()\n", - "\n", - "vec = Async(service_url, \"data_table\", 2, time_partition_interval=timedelta(seconds=60))\n", - "await vec.create_tables()\n", - "empty = await vec.table_is_empty()\n", - "assert empty\n", - "id = uuid.uuid1()\n", - "await vec.upsert([(id, {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", - "empty = await vec.table_is_empty()\n", - "assert not empty\n", - "await vec.delete_by_ids([id])\n", - "empty = await vec.table_is_empty()\n", - "assert empty\n", - "\n", - "raised = False\n", - "try:\n", - " # can't upsert with uuid type 4 in time partitioned table\n", + "\n", + " await vec.create_embedding_index(IvfflatIndex())\n", + " await vec.drop_embedding_index()\n", + " await vec.create_embedding_index(IvfflatIndex(100))\n", + " await vec.drop_embedding_index()\n", + " await vec.create_embedding_index(HNSWIndex())\n", + " await vec.drop_embedding_index()\n", + " await vec.create_embedding_index(HNSWIndex(20, 125))\n", + " await vec.drop_embedding_index()\n", + " await vec.create_embedding_index(DiskAnnIndex())\n", + " await vec.drop_embedding_index()\n", + " await vec.create_embedding_index(DiskAnnIndex(50, 50, 1.5, \"memory_optimized\", 2, 1))\n", + "\n", + " rec = await vec.search([1.0, 2.0])\n", + " assert len(rec) == 10\n", + " rec = await vec.search([1.0, 2.0], limit=4)\n", + " assert len(rec) == 4\n", + " rec = await vec.search(limit=4)\n", + " assert len(rec) == 4\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"val2\"})\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"does not exist\"})\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter={\"key_1\": \"val_1\"})\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], filter={\"key_1\": \"val_1\", \"key_2\": \"val_2\"})\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter={\"key_1\": \"val_1\", \"key_2\": \"val_3\"})\n", + " assert len(rec) == 0\n", + " rec = await vec.search(limit=4, filter={\"key_1\": \"val_1\", \"key_2\": \"val_3\"})\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 2\n", + " rec = await vec.search(limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 2\n", + "\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}, {\"no such key\": \"no such val\"}])\n", + " assert len(rec) == 2\n", + "\n", + " assert isinstance(rec[0][SEARCH_RESULT_METADATA_IDX], dict)\n", + " assert isinstance(rec[0][\"metadata\"], dict)\n", + " assert rec[0][\"contents\"] == \"the brown fox\"\n", + "\n", + "\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\")))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"==\", \"val2\")))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key\", \"==\", \"val2\"))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 10))\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<=\", 10))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<=\", 10.0))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_11\", \"<=\", 11.3))\n", + " assert len(rec) == 1\n", + " rec = await vec.search(limit=4, predicates=Predicates(\"key_11\", \">=\", 11.29999))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_11\", \"<\", 11.299999))\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [1, 2]))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [3, 7]))\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [42]))\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [4]))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [9, \"A\"]))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [\"A\"]))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", (\"C\", \"B\")))\n", + " assert len(rec) == 1\n", + "\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*[(\"key\", \"val2\"), (\"key_10\", \"<\", 100)]))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\"), (\"key_10\", \"<\", 100), operator='AND'))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\"), (\"key_2\", \"val_2\"), operator='OR'))\n", + " assert len(rec) == 2\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100) & (Predicates(\"key\",\"==\", \"val2\",) | Predicates(\"key_2\", \"==\", \"val_2\"))) \n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100) and (Predicates(\"key\",\"==\", \"val2\") or Predicates(\"key_2\",\"==\", \"val_2\"))) \n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [6,7]) and Predicates(\"key3\",\"==\", 3))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key0\", \"@>\", [6,7]) and Predicates(\"key3\",\"==\", 6))\n", + " assert len(rec) == 0\n", + " rec = await vec.search(limit=4, predicates=~Predicates((\"key\", \"val2\"), (\"key_10\", \"<\", 100)))\n", + " assert len(rec) == 4\n", + "\n", + " raised = False\n", + " try:\n", + " # can't upsert using both keys and dictionaries\n", + " await vec.upsert([\n", + " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2])\n", + " ])\n", + " except ValueError as e:\n", + " raised = True\n", + " assert raised\n", + "\n", + " raised = False\n", + " try:\n", + " # can't upsert using both keys and dictionaries opposite order\n", + " await vec.upsert([\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2]),\n", + " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " ])\n", + " except BaseException as e:\n", + " raised = True\n", + " assert raised\n", + "\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 2\n", + " await vec.delete_by_ids([rec[0][\"id\"]])\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 1\n", + " await vec.delete_by_metadata([{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", + " assert len(rec) == 4\n", + " await vec.delete_by_metadata([{\"key2\": \"val\"}])\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", + " assert len(rec) == 0\n", + "\n", + " assert not await vec.table_is_empty()\n", + " await vec.delete_all()\n", + " assert await vec.table_is_empty()\n", + "\n", + " await vec.drop_table()\n", + " await vec.close()\n", + "\n", + " vec = Async(service_url, \"data_table\", 2, id_type=\"TEXT\")\n", + " await vec.create_tables()\n", + " empty = await vec.table_is_empty()\n", + " assert empty\n", + " await vec.upsert([(\"Not a valid UUID\", {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", + " empty = await vec.table_is_empty()\n", + " assert not empty\n", + " await vec.delete_by_ids([\"Not a valid UUID\"])\n", + " empty = await vec.table_is_empty()\n", + " assert empty\n", + " await vec.drop_table()\n", + " await vec.close()\n", + "\n", + " vec = Async(service_url, \"data_table\", 2, time_partition_interval=timedelta(seconds=60))\n", + " await vec.create_tables()\n", + " empty = await vec.table_is_empty()\n", + " assert empty\n", + " id = uuid.uuid1()\n", + " await vec.upsert([(id, {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", + " empty = await vec.table_is_empty()\n", + " assert not empty\n", + " await vec.delete_by_ids([id])\n", + " empty = await vec.table_is_empty()\n", + " assert empty\n", + "\n", + " raised = False\n", + " try:\n", + " # can't upsert with uuid type 4 in time partitioned table\n", + " await vec.upsert([\n", + " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " ])\n", + " except BaseException as e:\n", + " raised = True\n", + " assert raised\n", + "\n", + " specific_datetime = datetime(2018, 8, 10, 15, 30, 0)\n", " await vec.upsert([\n", - " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " # current time\n", + " (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", + " #time in 2018\n", + " (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", " ])\n", - "except BaseException as e:\n", - " raised = True\n", - "assert raised\n", - "\n", - "specific_datetime = datetime(2018, 8, 10, 15, 30, 0)\n", - "await vec.upsert([\n", - " # current time\n", - " (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", - " #time in 2018\n", - " (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", - "])\n", - "assert not await vec.table_is_empty()\n", - "\n", - "#check all the possible ways to specify a date range\n", - "async def search_date(start_date, end_date, expected):\n", - " #using uuid_time_filter\n", - " rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date, end_date))\n", - " assert len(rec) == expected\n", - " rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(str(start_date), str(end_date)))\n", - " assert len(rec) == expected\n", + " assert not await vec.table_is_empty()\n", + "\n", + " #check all the possible ways to specify a date range\n", + " async def search_date(start_date, end_date, expected):\n", + " #using uuid_time_filter\n", + " rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date, end_date))\n", + " assert len(rec) == expected\n", + " rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(str(start_date), str(end_date)))\n", + " assert len(rec) == expected\n", " \n", - " #using filters\n", - " filter = {}\n", - " if start_date is not None:\n", - " filter[\"__start_date\"] = start_date\n", - " if end_date is not None:\n", - " filter[\"__end_date\"] = end_date\n", - " rec = await vec.search([1.0, 2.0], limit=4, filter=filter)\n", - " assert len(rec) == expected\n", - " #using filters with string dates\n", - " filter = {}\n", - " if start_date is not None:\n", - " filter[\"__start_date\"] = str(start_date)\n", - " if end_date is not None:\n", - " filter[\"__end_date\"] = str(end_date)\n", - " rec = await vec.search([1.0, 2.0], limit=4, filter=filter)\n", - " assert len(rec) == expected\n", - " #using predicates\n", - " predicates = []\n", - " if start_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \">=\", start_date))\n", - " if end_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \"<\", end_date))\n", - " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", - " assert len(rec) == expected\n", - " #using predicates with string dates\n", - " predicates = []\n", - " if start_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \">=\", str(start_date)))\n", - " if end_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \"<\", str(end_date)))\n", - " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", - " assert len(rec) == expected\n", - "\n", - "await search_date(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7), 1)\n", - "await search_date(specific_datetime-timedelta(days=7), None, 2)\n", - "await search_date(None, specific_datetime+timedelta(days=7), 1)\n", - "await search_date(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2), 0)\n", - "\n", - "#check timedelta handling\n", - "rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date=specific_datetime, time_delta=timedelta(days=7)))\n", - "assert len(rec) == 1\n", - "#end is exclusive\n", - "rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime, time_delta=timedelta(days=7)))\n", - "assert len(rec) == 0\n", - "rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime+timedelta(seconds=1), time_delta=timedelta(days=7)))\n", - "assert len(rec) == 1\n", - "rec = await vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(10, 5))\n", - "assert len(rec) == 2\n", - "rec = await vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(100))\n", - "assert len(rec) == 2\n", - "await vec.drop_table()\n", - "await vec.close()" + " #using filters\n", + " filter = {}\n", + " if start_date is not None:\n", + " filter[\"__start_date\"] = start_date\n", + " if end_date is not None:\n", + " filter[\"__end_date\"] = end_date\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=filter)\n", + " assert len(rec) == expected\n", + " #using filters with string dates\n", + " filter = {}\n", + " if start_date is not None:\n", + " filter[\"__start_date\"] = str(start_date)\n", + " if end_date is not None:\n", + " filter[\"__end_date\"] = str(end_date)\n", + " rec = await vec.search([1.0, 2.0], limit=4, filter=filter)\n", + " assert len(rec) == expected\n", + " #using predicates\n", + " predicates = []\n", + " if start_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \">=\", start_date))\n", + " if end_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \"<\", end_date))\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", + " assert len(rec) == expected\n", + " #using predicates with string dates\n", + " predicates = []\n", + " if start_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \">=\", str(start_date)))\n", + " if end_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \"<\", str(end_date)))\n", + " rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", + " assert len(rec) == expected\n", + "\n", + " await search_date(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7), 1)\n", + " await search_date(specific_datetime-timedelta(days=7), None, 2)\n", + " await search_date(None, specific_datetime+timedelta(days=7), 1)\n", + " await search_date(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2), 0)\n", + "\n", + " #check timedelta handling\n", + " rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date=specific_datetime, time_delta=timedelta(days=7)))\n", + " assert len(rec) == 1\n", + " #end is exclusive\n", + " rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime, time_delta=timedelta(days=7)))\n", + " assert len(rec) == 0\n", + " rec = await vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime+timedelta(seconds=1), time_delta=timedelta(days=7)))\n", + " assert len(rec) == 1\n", + " rec = await vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(10, 5))\n", + " assert len(rec) == 2\n", + " rec = await vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(100))\n", + " assert len(rec) == 2\n", + " await vec.drop_table()\n", + " await vec.close()" ] }, { @@ -1712,6 +1742,7 @@ " time_partition_interval: Optional[timedelta] = None,\n", " max_db_connections: Optional[int] = None,\n", " infer_filters: bool = True,\n", + " schema_name: Optional[str] = None,\n", " ) -> None:\n", " \"\"\"\n", " Initializes a sync client for storing vector data.\n", @@ -1728,9 +1759,15 @@ " The distance type for indexing.\n", " id_type\n", " The type of the primary id column. Can be either 'UUID' or 'TEXT'.\n", + " time_partition_interval\n", + " The time interval for partitioning the table (optional).\n", + " infer_filters\n", + " Whether to infer start and end times from the special __start_date and __end_date filters.\n", + " schema_name\n", + " The schema name for the table (optional, uses the database's default schema if not specified).\n", " \"\"\"\n", " self.builder = QueryBuilder(\n", - " table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters)\n", + " table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters, schema_name)\n", " self.service_url = service_url\n", " self.pool = None\n", " self.max_db_connections = max_db_connections\n", @@ -2033,24 +2070,24 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1217){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1233){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Sync.create_tables\n", "\n", "> Sync.create_tables ()\n", "\n", - "Creates necessary tables." + "*Creates necessary tables.*" ], "text/plain": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1217){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1233){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Sync.create_tables\n", "\n", "> Sync.create_tables ()\n", "\n", - "Creates necessary tables." + "*Creates necessary tables.*" ] }, "execution_count": null, @@ -2072,13 +2109,13 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1197){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1213){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Sync.upsert\n", "\n", "> Sync.upsert (records)\n", "\n", - "Performs upsert operation for multiple records.\n", + "*Performs upsert operation for multiple records.*\n", "\n", "| | **Type** | **Details** |\n", "| -- | -------- | ----------- |\n", @@ -2088,13 +2125,13 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1197){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1213){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Sync.upsert\n", "\n", "> Sync.upsert (records)\n", "\n", - "Performs upsert operation for multiple records.\n", + "*Performs upsert operation for multiple records.*\n", "\n", "| | **Type** | **Details** |\n", "| -- | -------- | ----------- |\n", @@ -2116,12 +2153,24 @@ "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/cevian/.pyenv/versions/3.11.4/envs/nbdev_env/lib/python3.11/site-packages/fastcore/docscrape.py:225: UserWarning: potentially wrong underline length... \n", + "Returns \n", + "-------- in \n", + "Retrieves similar records using a similarity query.\n", + "...\n", + " else: warn(msg)\n" + ] + }, { "data": { "text/markdown": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1332){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1348){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Sync.search\n", "\n", @@ -2131,7 +2180,7 @@ "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None,\n", "> query_params:Optional[__main__.QueryParams]=None)\n", "\n", - "Retrieves similar records using a similarity query.\n", + "*Retrieves similar records using a similarity query.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", @@ -2146,7 +2195,7 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1332){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/timescale/python-vector/blob/main/timescale_vector/client.py#L1348){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Sync.search\n", "\n", @@ -2156,7 +2205,7 @@ "> uuid_time_filter:Optional[__main__.UUIDTimeRange]=None,\n", "> query_params:Optional[__main__.QueryParams]=None)\n", "\n", - "Retrieves similar records using a similarity query.\n", + "*Retrieves similar records using a similarity query.*\n", "\n", "| | **Type** | **Default** | **Details** |\n", "| -- | -------- | ----------- | ----------- |\n", @@ -2196,6 +2245,8 @@ "con = await asyncpg.connect(service_url)\n", "await con.execute(\"DROP TABLE IF EXISTS data_table;\")\n", "await con.execute(\"DROP EXTENSION IF EXISTS vector CASCADE\")\n", + "await con.execute(\"DROP SCHEMA IF EXISTS tschema CASCADE;\")\n", + "await con.execute(\"CREATE SCHEMA tschema;\")\n", "await con.close()" ] }, @@ -2205,234 +2256,235 @@ "metadata": {}, "outputs": [], "source": [ - "vec = Sync(service_url, \"data_table\", 2)\n", - "vec.create_tables()\n", - "empty = vec.table_is_empty()\n", - "\n", - "assert empty\n", - "vec.upsert([(uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", - "empty = vec.table_is_empty()\n", - "assert not empty\n", - "\n", - "vec.upsert([\n", - " (uuid.uuid4(), '''{\"key\":\"val\"}''', \"the brown fox\", [1.0, 1.3]),\n", - " (uuid.uuid4(), '''{\"key\":\"val2\"}''', \"the brown fox\", [1.0, 1.4]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.5]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val2\"}''', \"the brown fox\", [1.0, 1.7]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.9]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 100.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 101.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key_1\":\"val_1\", \"key_2\":\"val_2\"}''',\n", - " \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key0\": [1,2,3,4]}''', \"the brown fox\", [1.0, 1.8]),\n", - " (uuid.uuid4(), '''{\"key0\": [5,6,7], \"key3\": 3}''', \"the brown fox\", [1.0, 1.8]),\n", - "])\n", - "\n", - "vec.create_embedding_index(IvfflatIndex())\n", - "vec.drop_embedding_index()\n", - "vec.create_embedding_index(IvfflatIndex(100))\n", - "vec.drop_embedding_index()\n", - "vec.create_embedding_index(HNSWIndex())\n", - "vec.drop_embedding_index()\n", - "vec.create_embedding_index(HNSWIndex(20, 125))\n", - "vec.drop_embedding_index()\n", - "vec.create_embedding_index(DiskAnnIndex())\n", - "vec.drop_embedding_index()\n", - "vec.create_embedding_index(DiskAnnIndex(50, 50, 1.5))\n", - "\n", - "rec = vec.search([1.0, 2.0])\n", - "assert len(rec) == 10\n", - "rec = vec.search(np.array([1.0, 2.0]))\n", - "assert len(rec) == 10\n", - "rec = vec.search([1.0, 2.0], limit=4)\n", - "assert len(rec) == 4\n", - "rec = vec.search(limit=4)\n", - "assert len(rec) == 4\n", - "rec = vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"val2\"})\n", - "assert len(rec) == 1\n", - "rec = vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"does not exist\"})\n", - "assert len(rec) == 0\n", - "rec = vec.search(limit=4, filter={\"key2\": \"does not exist\"})\n", - "assert len(rec) == 0\n", - "rec = vec.search([1.0, 2.0], limit=4, filter={\"key_1\": \"val_1\"})\n", - "assert len(rec) == 1\n", - "rec = vec.search([1.0, 2.0], filter={\"key_1\": \"val_1\", \"key_2\": \"val_2\"})\n", - "assert len(rec) == 1\n", - "rec = vec.search([1.0, 2.0], limit=4, filter={\n", - " \"key_1\": \"val_1\", \"key_2\": \"val_3\"})\n", - "assert len(rec) == 0\n", - "\n", - "rec = vec.search([1.0, 2.0], limit=4, filter=[\n", - " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 2\n", - "\n", - "rec = vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\n", - " \"key2\": \"val2\"}, {\"no such key\": \"no such val\"}])\n", - "assert len(rec) == 2\n", - "\n", - "raised = False\n", - "try:\n", - " # can't upsert using both keys and dictionaries\n", - " await vec.upsert([\n", - " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2])\n", - " ])\n", - "except ValueError as e:\n", - " raised = True\n", - "assert raised\n", + "for schema in [None, \"tschema\"]: \n", + " vec = Sync(service_url, \"data_table\", 2, schema_name=schema)\n", + " vec.create_tables()\n", + " empty = vec.table_is_empty()\n", "\n", - "raised = False\n", - "try:\n", - " # can't upsert using both keys and dictionaries opposite order\n", - " await vec.upsert([\n", - " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2]),\n", - " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " assert empty\n", + " vec.upsert([(uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", + " empty = vec.table_is_empty()\n", + " assert not empty\n", + "\n", + " vec.upsert([\n", + " (uuid.uuid4(), '''{\"key\":\"val\"}''', \"the brown fox\", [1.0, 1.3]),\n", + " (uuid.uuid4(), '''{\"key\":\"val2\"}''', \"the brown fox\", [1.0, 1.4]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.5]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.6]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val2\"}''', \"the brown fox\", [1.0, 1.7]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.9]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 100.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 101.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key_1\":\"val_1\", \"key_2\":\"val_2\"}''',\n", + " \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key0\": [1,2,3,4]}''', \"the brown fox\", [1.0, 1.8]),\n", + " (uuid.uuid4(), '''{\"key0\": [5,6,7], \"key3\": 3}''', \"the brown fox\", [1.0, 1.8]),\n", " ])\n", - "except BaseException as e:\n", - " raised = True\n", - "assert raised\n", - "\n", - "rec = vec.search([1.0, 2.0], filter={\"key_1\": \"val_1\", \"key_2\": \"val_2\"})\n", - "assert rec[0][SEARCH_RESULT_CONTENTS_IDX] == 'the brown fox'\n", - "assert rec[0][\"contents\"] == 'the brown fox'\n", - "assert rec[0][SEARCH_RESULT_METADATA_IDX] == {\n", - " 'key_1': 'val_1', 'key_2': 'val_2'}\n", - "assert rec[0][\"metadata\"] == {\n", - " 'key_1': 'val_1', 'key_2': 'val_2'}\n", - "assert isinstance(rec[0][SEARCH_RESULT_METADATA_IDX], dict)\n", - "assert rec[0][SEARCH_RESULT_DISTANCE_IDX] == 0.0009438353921149556\n", - "assert rec[0][\"distance\"] == 0.0009438353921149556\n", - "\n", - "rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key\",\"==\", \"val2\"))\n", - "assert len(rec) == 1\n", - "\n", - "rec = vec.search([1.0, 2.0], limit=4, filter=[\n", - " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "len(rec) == 2\n", - "vec.delete_by_ids([rec[0][SEARCH_RESULT_ID_IDX]])\n", - "rec = vec.search([1.0, 2.0], limit=4, filter=[\n", - " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 1\n", - "vec.delete_by_metadata([{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "rec = vec.search([1.0, 2.0], limit=4, filter=[\n", - " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", - "assert len(rec) == 0\n", - "rec = vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", - "assert len(rec) == 4\n", - "vec.delete_by_metadata([{\"key2\": \"val\"}])\n", - "rec = vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", - "len(rec) == 0\n", - "\n", - "assert not vec.table_is_empty()\n", - "vec.delete_all()\n", - "assert vec.table_is_empty()\n", - "\n", - "vec.drop_table()\n", - "vec.close()\n", - "\n", - "vec = Sync(service_url, \"data_table\", 2, id_type=\"TEXT\")\n", - "vec.create_tables()\n", - "assert vec.table_is_empty()\n", - "vec.upsert([(\"Not a valid UUID\", {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", - "assert not vec.table_is_empty()\n", - "vec.delete_by_ids([\"Not a valid UUID\"])\n", - "assert vec.table_is_empty()\n", - "vec.drop_table()\n", - "vec.close()\n", - "\n", - "vec = Sync(service_url, \"data_table\", 2, time_partition_interval=timedelta(seconds=60))\n", - "vec.create_tables()\n", - "assert vec.table_is_empty()\n", - "id = uuid.uuid1()\n", - "vec.upsert([(id, {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", - "assert not vec.table_is_empty()\n", - "vec.delete_by_ids([id])\n", - "assert vec.table_is_empty()\n", - "raised = False\n", - "try:\n", - " # can't upsert with uuid type 4 in time partitioned table\n", + "\n", + " vec.create_embedding_index(IvfflatIndex())\n", + " vec.drop_embedding_index()\n", + " vec.create_embedding_index(IvfflatIndex(100))\n", + " vec.drop_embedding_index()\n", + " vec.create_embedding_index(HNSWIndex())\n", + " vec.drop_embedding_index()\n", + " vec.create_embedding_index(HNSWIndex(20, 125))\n", + " vec.drop_embedding_index()\n", + " vec.create_embedding_index(DiskAnnIndex())\n", + " vec.drop_embedding_index()\n", + " vec.create_embedding_index(DiskAnnIndex(50, 50, 1.5))\n", + "\n", + " rec = vec.search([1.0, 2.0])\n", + " assert len(rec) == 10\n", + " rec = vec.search(np.array([1.0, 2.0]))\n", + " assert len(rec) == 10\n", + " rec = vec.search([1.0, 2.0], limit=4)\n", + " assert len(rec) == 4\n", + " rec = vec.search(limit=4)\n", + " assert len(rec) == 4\n", + " rec = vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"val2\"})\n", + " assert len(rec) == 1\n", + " rec = vec.search([1.0, 2.0], limit=4, filter={\"key2\": \"does not exist\"})\n", + " assert len(rec) == 0\n", + " rec = vec.search(limit=4, filter={\"key2\": \"does not exist\"})\n", + " assert len(rec) == 0\n", + " rec = vec.search([1.0, 2.0], limit=4, filter={\"key_1\": \"val_1\"})\n", + " assert len(rec) == 1\n", + " rec = vec.search([1.0, 2.0], filter={\"key_1\": \"val_1\", \"key_2\": \"val_2\"})\n", + " assert len(rec) == 1\n", + " rec = vec.search([1.0, 2.0], limit=4, filter={\n", + " \"key_1\": \"val_1\", \"key_2\": \"val_3\"})\n", + " assert len(rec) == 0\n", + "\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=[\n", + " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 2\n", + "\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=[{\"key_1\": \"val_1\"}, {\n", + " \"key2\": \"val2\"}, {\"no such key\": \"no such val\"}])\n", + " assert len(rec) == 2\n", + "\n", + " raised = False\n", + " try:\n", + " # can't upsert using both keys and dictionaries\n", + " await vec.upsert([\n", + " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2])\n", + " ])\n", + " except ValueError as e:\n", + " raised = True\n", + " assert raised\n", + "\n", + " raised = False\n", + " try:\n", + " # can't upsert using both keys and dictionaries opposite order\n", + " await vec.upsert([\n", + " (uuid.uuid4(), '''{\"key2\":\"val\"}''', \"the brown fox\", [1.0, 1.2]),\n", + " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " ])\n", + " except BaseException as e:\n", + " raised = True\n", + " assert raised\n", + "\n", + " rec = vec.search([1.0, 2.0], filter={\"key_1\": \"val_1\", \"key_2\": \"val_2\"})\n", + " assert rec[0][SEARCH_RESULT_CONTENTS_IDX] == 'the brown fox'\n", + " assert rec[0][\"contents\"] == 'the brown fox'\n", + " assert rec[0][SEARCH_RESULT_METADATA_IDX] == {\n", + " 'key_1': 'val_1', 'key_2': 'val_2'}\n", + " assert rec[0][\"metadata\"] == {\n", + " 'key_1': 'val_1', 'key_2': 'val_2'}\n", + " assert isinstance(rec[0][SEARCH_RESULT_METADATA_IDX], dict)\n", + " assert rec[0][SEARCH_RESULT_DISTANCE_IDX] == 0.0009438353921149556\n", + " assert rec[0][\"distance\"] == 0.0009438353921149556\n", + "\n", + " rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key\",\"==\", \"val2\"))\n", + " assert len(rec) == 1\n", + "\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=[\n", + " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " len(rec) == 2\n", + " vec.delete_by_ids([rec[0][SEARCH_RESULT_ID_IDX]])\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=[\n", + " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 1\n", + " vec.delete_by_metadata([{\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=[\n", + " {\"key_1\": \"val_1\"}, {\"key2\": \"val2\"}])\n", + " assert len(rec) == 0\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", + " assert len(rec) == 4\n", + " vec.delete_by_metadata([{\"key2\": \"val\"}])\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=[{\"key2\": \"val\"}])\n", + " len(rec) == 0\n", + "\n", + " assert not vec.table_is_empty()\n", + " vec.delete_all()\n", + " assert vec.table_is_empty()\n", + "\n", + " vec.drop_table()\n", + " vec.close()\n", + "\n", + " vec = Sync(service_url, \"data_table\", 2, id_type=\"TEXT\", schema_name=schema)\n", + " vec.create_tables()\n", + " assert vec.table_is_empty()\n", + " vec.upsert([(\"Not a valid UUID\", {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", + " assert not vec.table_is_empty()\n", + " vec.delete_by_ids([\"Not a valid UUID\"])\n", + " assert vec.table_is_empty()\n", + " vec.drop_table()\n", + " vec.close()\n", + "\n", + " vec = Sync(service_url, \"data_table\", 2, time_partition_interval=timedelta(seconds=60), schema_name=schema)\n", + " vec.create_tables()\n", + " assert vec.table_is_empty()\n", + " id = uuid.uuid1()\n", + " vec.upsert([(id, {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])])\n", + " assert not vec.table_is_empty()\n", + " vec.delete_by_ids([id])\n", + " assert vec.table_is_empty()\n", + " raised = False\n", + " try:\n", + " # can't upsert with uuid type 4 in time partitioned table\n", + " vec.upsert([\n", + " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " ])\n", + " #pass\n", + " except BaseException as e:\n", + " raised = True\n", + " assert raised\n", + "\n", + " specific_datetime = datetime(2018, 8, 10, 15, 30, 0)\n", " vec.upsert([\n", - " (uuid.uuid4(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", + " # current time\n", + " (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", + " #time in 2018\n", + " (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", " ])\n", - " #pass\n", - "except BaseException as e:\n", - " raised = True\n", - "assert raised\n", - "\n", - "specific_datetime = datetime(2018, 8, 10, 15, 30, 0)\n", - "vec.upsert([\n", - " # current time\n", - " (uuid.uuid1(), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2]),\n", - " #time in 2018\n", - " (uuid_from_time(specific_datetime), {\"key\": \"val\"}, \"the brown fox\", [1.0, 1.2])\n", - "])\n", - "\n", - "def search_date(start_date, end_date, expected):\n", - " #using uuid_time_filter\n", - " rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date, end_date))\n", - " assert len(rec) == expected\n", - " rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(str(start_date), str(end_date)))\n", - " assert len(rec) == expected\n", - " \n", - " #using filters\n", - " filter = {}\n", - " if start_date is not None:\n", - " filter[\"__start_date\"] = start_date\n", - " if end_date is not None:\n", - " filter[\"__end_date\"] = end_date\n", - " rec = vec.search([1.0, 2.0], limit=4, filter=filter)\n", - " assert len(rec) == expected\n", - " #using filters with string dates\n", - " filter = {}\n", - " if start_date is not None:\n", - " filter[\"__start_date\"] = str(start_date)\n", - " if end_date is not None:\n", - " filter[\"__end_date\"] = str(end_date)\n", - " rec = vec.search([1.0, 2.0], limit=4, filter=filter)\n", - " assert len(rec) == expected\n", - " #using predicates\n", - " predicates = []\n", - " if start_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \">=\", start_date))\n", - " if end_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \"<\", end_date))\n", - " rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", - " assert len(rec) == expected\n", - " #using predicates with string dates\n", - " predicates = []\n", - " if start_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \">=\", str(start_date)))\n", - " if end_date is not None:\n", - " predicates.append((\"__uuid_timestamp\", \"<\", str(end_date)))\n", - " rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", - " assert len(rec) == expected\n", - "\n", - "assert not vec.table_is_empty()\n", - "\n", - "search_date(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7), 1)\n", - "search_date(specific_datetime-timedelta(days=7), None, 2)\n", - "search_date(None, specific_datetime+timedelta(days=7), 1)\n", - "search_date(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2), 0)\n", - "\n", - "#check timedelta handling\n", - "rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date=specific_datetime, time_delta=timedelta(days=7)))\n", - "assert len(rec) == 1\n", - "#end is exclusive\n", - "rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime, time_delta=timedelta(days=7)))\n", - "assert len(rec) == 0\n", - "rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime+timedelta(seconds=1), time_delta=timedelta(days=7)))\n", - "assert len(rec) == 1\n", - "rec = vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(10, 5))\n", - "assert len(rec) == 2\n", - "rec = vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(100, rescore=2))\n", - "assert len(rec) == 2\n", - "vec.drop_table()\n", - "vec.close()" + "\n", + " def search_date(start_date, end_date, expected):\n", + " #using uuid_time_filter\n", + " rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date, end_date))\n", + " assert len(rec) == expected\n", + " rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(str(start_date), str(end_date)))\n", + " assert len(rec) == expected\n", + " \n", + " #using filters\n", + " filter = {}\n", + " if start_date is not None:\n", + " filter[\"__start_date\"] = start_date\n", + " if end_date is not None:\n", + " filter[\"__end_date\"] = end_date\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=filter)\n", + " assert len(rec) == expected\n", + " #using filters with string dates\n", + " filter = {}\n", + " if start_date is not None:\n", + " filter[\"__start_date\"] = str(start_date)\n", + " if end_date is not None:\n", + " filter[\"__end_date\"] = str(end_date)\n", + " rec = vec.search([1.0, 2.0], limit=4, filter=filter)\n", + " assert len(rec) == expected\n", + " #using predicates\n", + " predicates = []\n", + " if start_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \">=\", start_date))\n", + " if end_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \"<\", end_date))\n", + " rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", + " assert len(rec) == expected\n", + " #using predicates with string dates\n", + " predicates = []\n", + " if start_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \">=\", str(start_date)))\n", + " if end_date is not None:\n", + " predicates.append((\"__uuid_timestamp\", \"<\", str(end_date)))\n", + " rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(*predicates))\n", + " assert len(rec) == expected\n", + "\n", + " assert not vec.table_is_empty()\n", + "\n", + " search_date(specific_datetime-timedelta(days=7), specific_datetime+timedelta(days=7), 1)\n", + " search_date(specific_datetime-timedelta(days=7), None, 2)\n", + " search_date(None, specific_datetime+timedelta(days=7), 1)\n", + " search_date(specific_datetime-timedelta(days=7), specific_datetime-timedelta(days=2), 0)\n", + "\n", + " #check timedelta handling\n", + " rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(start_date=specific_datetime, time_delta=timedelta(days=7)))\n", + " assert len(rec) == 1\n", + " #end is exclusive\n", + " rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime, time_delta=timedelta(days=7)))\n", + " assert len(rec) == 0\n", + " rec = vec.search([1.0, 2.0], limit=4, uuid_time_filter=UUIDTimeRange(end_date=specific_datetime+timedelta(seconds=1), time_delta=timedelta(days=7)))\n", + " assert len(rec) == 1\n", + " rec = vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(10, 5))\n", + " assert len(rec) == 2\n", + " rec = vec.search([1.0, 2.0], limit=4, query_params=DiskAnnIndexParams(100, rescore=2))\n", + " assert len(rec) == 2\n", + " vec.drop_table()\n", + " vec.close()" ] }, { @@ -2445,6 +2497,13 @@ "import nbdev\n", "nbdev.nbdev_export()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/nbs/index.ipynb b/nbs/index.ipynb index 31e080b..d2d9747 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -107,7 +107,9 @@ "|-------------|-------------------------------------------|\n", "| service_url | Timescale service URL / connection string |\n", "| table_name | Name of the table to use for storing the embeddings. Think of this as the collection name |\n", - "| num_dimensions | Number of dimensions in the vector | " + "| num_dimensions | Number of dimensions in the vector |\n", + "\n", + "You can also specify the schema name, distance type, primary key type, etc. as optional parameters. Please see the documentation for details. " ] }, { diff --git a/timescale_vector/_modidx.py b/timescale_vector/_modidx.py index 9f639cf..f2410fe 100644 --- a/timescale_vector/_modidx.py +++ b/timescale_vector/_modidx.py @@ -93,10 +93,14 @@ 'timescale_vector.client.QueryBuilder': ('vector.html#querybuilder', 'timescale_vector/client.py'), 'timescale_vector.client.QueryBuilder.__init__': ( 'vector.html#querybuilder.__init__', 'timescale_vector/client.py'), - 'timescale_vector.client.QueryBuilder._get_embedding_index_name': ( 'vector.html#querybuilder._get_embedding_index_name', - 'timescale_vector/client.py'), + 'timescale_vector.client.QueryBuilder._get_embedding_index_name_quoted': ( 'vector.html#querybuilder._get_embedding_index_name_quoted', + 'timescale_vector/client.py'), + 'timescale_vector.client.QueryBuilder._get_schema_qualified_embedding_index_name_quoted': ( 'vector.html#querybuilder._get_schema_qualified_embedding_index_name_quoted', + 'timescale_vector/client.py'), 'timescale_vector.client.QueryBuilder._quote_ident': ( 'vector.html#querybuilder._quote_ident', 'timescale_vector/client.py'), + 'timescale_vector.client.QueryBuilder._quoted_table_name': ( 'vector.html#querybuilder._quoted_table_name', + 'timescale_vector/client.py'), 'timescale_vector.client.QueryBuilder._where_clause_for_filter': ( 'vector.html#querybuilder._where_clause_for_filter', 'timescale_vector/client.py'), 'timescale_vector.client.QueryBuilder.create_embedding_index_query': ( 'vector.html#querybuilder.create_embedding_index_query', diff --git a/timescale_vector/client.py b/timescale_vector/client.py index ad6c87b..11a67d3 100644 --- a/timescale_vector/client.py +++ b/timescale_vector/client.py @@ -477,7 +477,8 @@ def __init__( distance_type: str, id_type: str, time_partition_interval: Optional[timedelta], - infer_filters: bool) -> None: + infer_filters: bool, + schema_name: Optional[str]) -> None: """ Initializes a base Vector object to generate queries for vector clients. @@ -491,8 +492,15 @@ def __init__( The distance type for indexing. id_type The type of the id column. Can be either 'UUID' or 'TEXT'. + time_partition_interval + The time interval for partitioning the table (optional). + infer_filters + Whether to infer start and end times from the special __start_date and __end_date filters. + schema_name + The schema name for the table (optional, uses the database's default schema if not specified). """ self.table_name = table_name + self.schema_name = schema_name self.num_dimensions = num_dimensions if distance_type == 'cosine' or distance_type == '<=>': self.distance_type = '<=>' @@ -526,6 +534,12 @@ def _quote_ident(ident): str: The quoted identifier. """ return '"{}"'.format(ident.replace('"', '""')) + + def _quoted_table_name(self): + if self.schema_name is not None: + return self._quote_ident(self.schema_name) + "." + self._quote_ident(self.table_name) + else: + return self._quote_ident(self.table_name) def get_row_exists_query(self): """ @@ -535,7 +549,7 @@ def get_row_exists_query(self): ------- str: The query to check for row existence. """ - return "SELECT 1 FROM {table_name} LIMIT 1".format(table_name=self._quote_ident(self.table_name)) + return "SELECT 1 FROM {table_name} LIMIT 1".format(table_name=self._quoted_table_name()) def get_upsert_query(self): """ @@ -545,7 +559,7 @@ def get_upsert_query(self): ------- str: The upsert query. """ - return "INSERT INTO {table_name} (id, metadata, contents, embedding) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING".format(table_name=self._quote_ident(self.table_name)) + return "INSERT INTO {table_name} (id, metadata, contents, embedding) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING".format(table_name=self._quoted_table_name()) def get_approx_count_query(self): """ @@ -556,7 +570,7 @@ def get_approx_count_query(self): str: the query. """ # todo optimize with approx - return "SELECT COUNT(*) as cnt FROM {table_name}".format(table_name=self._quote_ident(self.table_name)) + return "SELECT COUNT(*) as cnt FROM {table_name}".format(table_name=self._quoted_table_name()) #| export def get_create_query(self): @@ -606,7 +620,7 @@ def get_create_query(self): time_partitioning_func=>'public.uuid_timestamp', chunk_time_interval => '{chunk_time_interval} seconds'::interval); '''.format( - table_name=self._quote_ident(self.table_name), + table_name=self._quoted_table_name(), chunk_time_interval=str(self.time_partition_interval.total_seconds()), ) return ''' @@ -625,36 +639,42 @@ def get_create_query(self): {hypertable_sql} '''.format( - table_name=self._quote_ident(self.table_name), + table_name=self._quoted_table_name(), id_type=self.id_type, index_name=self._quote_ident(self.table_name+"_meta_idx"), dimensions=self.num_dimensions, hypertable_sql=hypertable_sql, ) - - def _get_embedding_index_name(self): + + def _get_embedding_index_name_quoted(self): return self._quote_ident(self.table_name+"_embedding_idx") + + def _get_schema_qualified_embedding_index_name_quoted(self): + if self.schema_name is not None: + return self._quote_ident(self.schema_name)+"."+self._get_embedding_index_name_quoted() + else: + return self._get_embedding_index_name_quoted() def drop_embedding_index_query(self): - return "DROP INDEX IF EXISTS {index_name};".format(index_name=self._get_embedding_index_name()) + return "DROP INDEX IF EXISTS {schema_qualified_index_name};".format(schema_qualified_index_name=self._get_schema_qualified_embedding_index_name_quoted()) def delete_all_query(self): - return "TRUNCATE {table_name};".format(table_name=self._quote_ident(self.table_name)) + return "TRUNCATE {table_name};".format(table_name=self._quoted_table_name()) def delete_by_ids_query(self, ids: Union[List[uuid.UUID], List[str]]) -> Tuple[str, List]: query = "DELETE FROM {table_name} WHERE id = ANY($1::{id_type}[]);".format( - table_name=self._quote_ident(self.table_name), id_type=self.id_type) + table_name=self._quoted_table_name(), id_type=self.id_type) return (query, [ids]) def delete_by_metadata_query(self, filter: Union[Dict[str, str], List[Dict[str, str]]]) -> Tuple[str, List]: params: List[Any] = [] (where, params) = self._where_clause_for_filter(params, filter) query = "DELETE FROM {table_name} WHERE {where};".format( - table_name=self._quote_ident(self.table_name), where=where) + table_name=self._quoted_table_name(), where=where) return (query, params) def drop_table_query(self): - return "DROP TABLE IF EXISTS {table_name};".format(table_name=self._quote_ident(self.table_name)) + return "DROP TABLE IF EXISTS {table_name};".format(table_name=self._quoted_table_name()) def default_max_db_connection_query(self): """ @@ -679,8 +699,8 @@ def create_embedding_index_query(self, index: BaseIndex, num_records_callback: C str: The index creation query. """ column_name = "embedding" - index_name = self._get_embedding_index_name() - query = index.create_index_query(self._quote_ident(self.table_name), self._quote_ident(column_name), index_name, self.distance_type, num_records_callback) + index_name_quoted = self._get_embedding_index_name_quoted() + query = index.create_index_query(self._quoted_table_name(), self._quote_ident(column_name), index_name_quoted, self.distance_type, num_records_callback) return query def _where_clause_for_filter(self, params: List, filter: Optional[Union[Dict[str, str], List[Dict[str, str]]]]) -> Tuple[str, List]: @@ -772,7 +792,7 @@ def search_query( {where} {order_by_clause} LIMIT {limit} - '''.format(distance=distance, order_by_clause=order_by_clause, where=where, table_name=self._quote_ident(self.table_name), limit=limit) + '''.format(distance=distance, order_by_clause=order_by_clause, where=where, table_name=self._quoted_table_name(), limit=limit) return (query, params) # %% ../nbs/00_vector.ipynb 18 @@ -787,6 +807,7 @@ def __init__( time_partition_interval: Optional[timedelta] = None, max_db_connections: Optional[int] = None, infer_filters: bool = True, + schema_name: Optional[str] = None, ) -> None: """ Initializes a async client for storing vector data. @@ -803,9 +824,15 @@ def __init__( The distance type for indexing. id_type The type of the id column. Can be either 'UUID' or 'TEXT'. + time_partition_interval + The time interval for partitioning the table (optional). + infer_filters + Whether to infer start and end times from the special __start_date and __end_date filters. + schema_name + The schema name for the table (optional, uses the database's default schema if not specified). """ self.builder = QueryBuilder( - table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters) + table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters, schema_name) self.service_url = service_url self.pool = None self.max_db_connections = max_db_connections @@ -1063,6 +1090,7 @@ def __init__( time_partition_interval: Optional[timedelta] = None, max_db_connections: Optional[int] = None, infer_filters: bool = True, + schema_name: Optional[str] = None, ) -> None: """ Initializes a sync client for storing vector data. @@ -1079,9 +1107,15 @@ def __init__( The distance type for indexing. id_type The type of the primary id column. Can be either 'UUID' or 'TEXT'. + time_partition_interval + The time interval for partitioning the table (optional). + infer_filters + Whether to infer start and end times from the special __start_date and __end_date filters. + schema_name + The schema name for the table (optional, uses the database's default schema if not specified). """ self.builder = QueryBuilder( - table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters) + table_name, num_dimensions, distance_type, id_type, time_partition_interval, infer_filters, schema_name) self.service_url = service_url self.pool = None self.max_db_connections = max_db_connections