diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 87babfb14..def641b72 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -67,3 +67,27 @@ jobs: env: RUST_BACKTRACE: 1 run: cargo test --all --no-default-features --features "pg${{ matrix.version }} pg_test" -- --nocapture + + test: + strategy: + matrix: + version: [15] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Prepare + run: | + sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - + sudo apt-get update + sudo apt-get -y install libpq-dev postgresql-${{ matrix.version }} postgresql-server-dev-${{ matrix.version }} + cargo install cargo-pgrx --git https://github.com/tensorchord/pgrx.git --rev $(cat Cargo.toml | grep "pgrx =" | awk -F'rev = "' '{print $2}' | cut -d'"' -f1) + cargo pgrx init --pg${{ matrix.version }}=/usr/lib/postgresql/${{ matrix.version }}/bin/pg_config + cargo install sqllogictest-bin + - name: Build && Install + run: cargo pgrx install --release + - name: Sqllogictest + run: | + sudo systemctl restart postgresql + sudo -u postgres psql -f tests/init.sql + sudo -u postgres sqllogictest './tests/**/*.slt' \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 212420261..ec732068c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,4 +3,4 @@ FROM postgres:15 ARG TAG=latest COPY . /tmp/build -RUN (cd /tmp/build && ./docker.sh) +RUN (cd /tmp/build && ./pg.sh) diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 000000000..7ec61c8c9 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,13 @@ +## Tests for pgvecto.rs + +We use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) to test the SQL queries. + +To run all tests, use the following command: +```shell +sqllogictest './tests/**/*.slt' +``` + +Each time you modify the source code, you can run the following command to clean up the test data and reload the extension: +```shell +psql -f ./tests/init.sql +``` \ No newline at end of file diff --git a/tests/init.sql b/tests/init.sql new file mode 100644 index 000000000..cf36e4127 --- /dev/null +++ b/tests/init.sql @@ -0,0 +1,3 @@ +DROP EXTENSION IF EXISTS vectors CASCADE; +CREATE EXTENSION vectors; +DROP TABLE IF EXISTS t; \ No newline at end of file diff --git a/tests/sqllogictest/cast.slt b/tests/sqllogictest/cast.slt new file mode 100644 index 000000000..56c5cdfd9 --- /dev/null +++ b/tests/sqllogictest/cast.slt @@ -0,0 +1,115 @@ +# cast string to vector +query I +SELECT '[1,2,3]'::vector; +---- +[1, 2, 3] + +statement error Bad charactor +SELECT '{1,2,3}'::vector; + +# cast array to vector +query I +SELECT '{1,2,3}'::real[]::vector; +---- +[1, 2, 3] + +statement error cannot cast type double precision\[\] to vector +SELECT '{1,2,3}'::float[]::vector; + +statement error cannot cast type integer\[\] to vector +SELECT ARRAY[1,2,3]::vector; + +statement error cannot cast type numeric\[\] to vector +SELECT ARRAY[1.,2.,3.]::vector; + +# cast vector to array +query I +SELECT '[1,2,3]'::vector::real[]; +---- +{1,2,3} + +statement error cannot cast type vector to double precision\[\] +SELECT '[1,2,3]'::vector::float[]; + +statement error cannot cast type vector to integer\[\] +SELECT '[1,2,3]'::vector::int[]; + +statement error cannot cast type vector to numeric\[\] +SELECT '[1,2,3]'::vector::numeric[]; + +# cast unusual value to vector +statement error assertion failed: !array.contains_nulls() +SELECT '{NULL}'::real[]::vector; + +query I +SELECT '{NaN, Infinity, -Infinity}'::real[]::vector; +---- +[NaN, inf, -inf] + +query I +SELECT '[3.4e38, -3.4e38, 3.5e38, -3.5e38]'::vector +---- +[340000000000000000000000000000000000000, -340000000000000000000000000000000000000, inf, -inf] + +statement error assertion failed: !array.is_empty() +SELECT '{}'::real[]::vector; + +# TODO: inconsistent behavior with empty array casting +query I +SELECT '[]'::vector; +---- +[] + +# parse all kinds of string +statement error Bad sequence +SELECT '[1,2,3'::vector; + +statement error Bad charactor +SELECT '[1,2,3]9'::vector; + +statement error Bad charactor +SELECT '1,2,3'::vector; + +statement error Bad sequence +SELECT ''::vector; + +statement error Bad sequence +SELECT '['::vector; + +statement error Expect a number +SELECT '[,'::vector; + +query I +SELECT '[]'::vector; +---- +[] + +query I +SELECT '[1,]'::vector; +---- +[1] + +statement error Bad charactor +SELECT '[1a]'::vector; + +statement error Expect a number +SELECT '[1,,3]'::vector; + +statement error Expect a number +SELECT '[1, ,3]'::vector; + +# cast large array to vector +statement ok +SELECT array_agg(n)::real[]::vector FROM generate_series(1, 16001) n; + +# vector array +query I +SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::vector[]); +---- +[1, 2, 3] +[4, 5, 6] + +query I +SELECT '{"[1,2,3]"}'::vector[]; +---- +{"[1, 2, 3]"} \ No newline at end of file diff --git a/tests/sqllogictest/flat.slt b/tests/sqllogictest/flat.slt new file mode 100644 index 000000000..4a4420637 --- /dev/null +++ b/tests/sqllogictest/flat.slt @@ -0,0 +1,36 @@ +statement ok +DROP TABLE IF EXISTS t; + +statement ok +CREATE TABLE t (val vector(3)); + +statement ok +INSERT INTO t (val) SELECT ARRAY[random(), random(), random()]::real[] FROM generate_series(1, 1000); + +statement ok +CREATE INDEX ON t USING vectors (val l2_ops) +WITH (options = $$ +capacity = 2000 +[algorithm.flat] +$$); + +statement ok +INSERT INTO t (val) VALUES ('[0.6,0.6,0.6]'); + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <-> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <=> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +statement ok +DROP TABLE t; \ No newline at end of file diff --git a/tests/sqllogictest/hnsw.slt b/tests/sqllogictest/hnsw.slt new file mode 100644 index 000000000..5e96b8614 --- /dev/null +++ b/tests/sqllogictest/hnsw.slt @@ -0,0 +1,39 @@ +statement ok +DROP TABLE IF EXISTS t; + +statement ok +CREATE TABLE t (val vector(3)); + +statement ok +INSERT INTO t (val) SELECT ARRAY[random(), random(), random()]::real[] FROM generate_series(1, 1000); + +# TODO: if previous table has nulls, then this statement fails with 'ERROR: called `Option::unwrap()` on a `None` value'. +# And because of borrow checker, we can't remove this table before restarting the postgres. +# Maybe we need better error handling. +statement ok +CREATE INDEX ON t USING vectors (val l2_ops) +WITH (options = $$ +capacity = 2000 +[algorithm.hnsw] +$$); + +statement ok +INSERT INTO t (val) VALUES ('[0.6,0.6,0.6]'); + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <-> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <=> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +statement ok +DROP TABLE t; \ No newline at end of file diff --git a/tests/sqllogictest/ivf.slt b/tests/sqllogictest/ivf.slt new file mode 100644 index 000000000..fca979c69 --- /dev/null +++ b/tests/sqllogictest/ivf.slt @@ -0,0 +1,78 @@ +statement ok +DROP TABLE IF EXISTS t; + +# ivf native +statement ok +DROP TABLE IF EXISTS t; + +statement ok +CREATE TABLE t (val vector(3)); + +statement ok +INSERT INTO t (val) SELECT ARRAY[random(), random(), random()]::real[] FROM generate_series(1, 1000); + +statement ok +CREATE INDEX ON t USING vectors (val l2_ops) +WITH (options = $$ +capacity = 2000 +[algorithm.ivf] +$$); + +statement ok +INSERT INTO t (val) VALUES ('[0.6,0.6,0.6]'); + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <-> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <=> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +statement ok +DROP TABLE t; + +# ivfpq +statement ok +DROP TABLE IF EXISTS t; + +statement ok +CREATE TABLE t (val vector(3)); + +statement ok +INSERT INTO t (val) SELECT ARRAY[random(), random(), random()]::real[] FROM generate_series(1, 1000); + +statement ok +CREATE INDEX ON t USING vectors (val l2_ops) +WITH (options = $$ +capacity = 2000 +[algorithm.ivf.quantization.product] +$$); + +statement ok +INSERT INTO t (val) VALUES ('[0.6,0.6,0.6]'); + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <-> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <=> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +statement ok +DROP TABLE t; \ No newline at end of file diff --git a/tests/sqllogictest/operator.slt b/tests/sqllogictest/operator.slt new file mode 100644 index 000000000..a51e4ce03 --- /dev/null +++ b/tests/sqllogictest/operator.slt @@ -0,0 +1,65 @@ +# basic + - = <> < <= > >= +query I +SELECT '[1,2.3,4e5]'::vector + '[6,7.8,9e10]'; +---- +[7, 10.1, 90000400000] + +query I +SELECT '[1,2.3,4e5]'::vector - '[6,7.8,9e10]'; +---- +[-5, -5.5, -89999600000] + +query I +SELECT '[1,2,3]'::vector = '[1,2,3]'; +---- +t + +query I +SELECT '[1,2,3]'::vector <> '[1,2,3]'; +---- +f + +query I +SELECT '[1,2]'::vector < '[2,2]'; +---- +t + +query I +SELECT '[1,2]'::vector < '[1,3]'; +---- +t + +# TODO: may need better error message +statement error assertion failed: `\(left == right\)` +SELECT '[1,2]'::vector < '[1,2,3]'; + +query I +SELECT '[1,2]'::vector <= '[2,2]'; +---- +t + +query I +SELECT '[1,2]'::vector > '[2,2]'; +---- +f + +query I +SELECT '[1,2]'::vector >= '[2,2]'; +---- +f + +# basic <->(squared Euclidean distance) <#>(negative dot product distance) <=>(negative cosine distance) +query I +SELECT '[1,2]'::vector <-> '[3,4]'; +---- +8 + +query I +SELECT '[1,2]'::vector <#> '[3,4]'; +---- +-11 + +query I +SELECT '[1,2]'::vector <=> '[3,4]'; +---- +-0.98386997 \ No newline at end of file diff --git a/tests/sqllogictest/quantization.slt b/tests/sqllogictest/quantization.slt new file mode 100644 index 000000000..c4b673dec --- /dev/null +++ b/tests/sqllogictest/quantization.slt @@ -0,0 +1,72 @@ +statement ok +DROP TABLE IF EXISTS t; + +# product quantization +statement ok +CREATE TABLE t (val vector(3)); + +statement ok +INSERT INTO t (val) SELECT ARRAY[random(), random(), random()]::real[] FROM generate_series(1, 1000); + +statement ok +CREATE INDEX ON t USING vectors (val l2_ops) +WITH (options = $$ +capacity = 2000 +[algorithm.hnsw.quantization.product] +$$); + +statement ok +INSERT INTO t (val) VALUES ('[0.6,0.6,0.6]'); + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <-> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <=> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +statement ok +DROP TABLE t; + +# scalar quantization +statement ok +CREATE TABLE t (val vector(3)); + +statement ok +INSERT INTO t (val) SELECT ARRAY[random(), random(), random()]::real[] FROM generate_series(1, 1000); + +statement ok +CREATE INDEX ON t USING vectors (val l2_ops) +WITH (options = $$ +capacity = 2000 +[algorithm.hnsw.quantization.scalar] +$$); + +statement ok +INSERT INTO t (val) VALUES ('[0.6,0.6,0.6]'); + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <-> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <=> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +statement ok +DROP TABLE t; \ No newline at end of file diff --git a/tests/sqllogictest/vamana.slt b/tests/sqllogictest/vamana.slt new file mode 100644 index 000000000..28b64eee8 --- /dev/null +++ b/tests/sqllogictest/vamana.slt @@ -0,0 +1,36 @@ +statement ok +DROP TABLE IF EXISTS t; + +statement ok +CREATE TABLE t (val vector(3)); + +statement ok +INSERT INTO t (val) SELECT ARRAY[random(), random(), random()]::real[] FROM generate_series(1, 1000); + +statement ok +CREATE INDEX ON t USING vectors (val l2_ops) +WITH (options = $$ +capacity = 2000 +[algorithm.vamana] +$$); + +statement ok +INSERT INTO t (val) VALUES ('[0.6,0.6,0.6]'); + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <-> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <=> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +query I +SELECT COUNT(1) FROM (SELECT 1 FROM t ORDER BY val <#> '[0.5,0.5,0.5]' limit 10) t2; +---- +10 + +statement ok +DROP TABLE t; \ No newline at end of file