diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a990e582..0beeba05 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,7 +19,7 @@ concurrency: jobs: analyze: - name: "Analyze with SQLAlchemy ${{ matrix.sqla-version }}" + name: "Analyze Python code" runs-on: ubuntu-latest permissions: actions: read @@ -28,9 +28,7 @@ jobs: strategy: fail-fast: false - matrix: - language: [ python ] - sqla-version: ['<1.4', '<1.5', '<2.1'] + language: [ python ] steps: - name: Checkout @@ -57,10 +55,7 @@ jobs: - name: Install project run: | - pip install --editable=.[sqlalchemy,test] - pip install "sqlalchemy${{ matrix.sqla-version }}" --upgrade --pre + pip install --editable=.[test] - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 - with: - category: "/language:${{ matrix.language }}/sqla-version:${{ matrix.sqla-version }}" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 1d1dbbfc..ccb65d9d 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -9,7 +9,6 @@ on: jobs: nightly: name: "Python: ${{ matrix.python-version }} - SQLA: ${{ matrix.sqla-version }} CrateDB: ${{ matrix.cratedb-version }} on ${{ matrix.os }}" runs-on: ${{ matrix.os }} @@ -18,23 +17,11 @@ jobs: os: ['ubuntu-latest'] python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] cratedb-version: ['nightly'] - sqla-version: ['latest'] - pip-allow-prerelease: ['false'] - - # Another CI test matrix slot to test against prerelease versions of Python packages. - include: - - os: 'ubuntu-latest' - python-version: '3.12' - cratedb-version: 'nightly' - sqla-version: 'latest' - pip-allow-prerelease: 'true' fail-fast: false env: CRATEDB_VERSION: ${{ matrix.cratedb-version }} - SQLALCHEMY_VERSION: ${{ matrix.sqla-version }} - PIP_ALLOW_PRERELEASE: ${{ matrix.pip-allow-prerelease }} steps: - uses: actions/checkout@v4 @@ -55,11 +42,10 @@ jobs: source bootstrap.sh # Report about the test matrix slot. - echo "Invoking tests with CrateDB ${CRATEDB_VERSION} and SQLAlchemy ${SQLALCHEMY_VERSION}" + echo "Invoking tests with CrateDB ${CRATEDB_VERSION}" # Run linter. flake8 src bin # Run tests. - export SQLALCHEMY_WARN_20=1 bin/test -vvv diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 672d07f4..3edd14be 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,6 @@ concurrency: jobs: test: name: "Python: ${{ matrix.python-version }} - SQLA: ${{ matrix.sqla-version }} on ${{ matrix.os }}" runs-on: ${{ matrix.os }} strategy: @@ -21,8 +20,6 @@ jobs: os: ['ubuntu-latest', 'macos-latest'] python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] cratedb-version: ['5.4.5'] - sqla-version: ['<1.4', '<1.5', '<2.1'] - pip-allow-prerelease: ['false'] # To save resources, only use the most recent Python versions on macOS. exclude: @@ -35,20 +32,10 @@ jobs: - os: 'macos-latest' python-version: '3.10' - # Another CI test matrix slot to test against prerelease versions of Python packages. - include: - - os: 'ubuntu-latest' - python-version: '3.12' - cratedb-version: '5.4.5' - sqla-version: 'latest' - pip-allow-prerelease: 'true' - fail-fast: false env: CRATEDB_VERSION: ${{ matrix.cratedb-version }} - SQLALCHEMY_VERSION: ${{ matrix.sqla-version }} - PIP_ALLOW_PRERELEASE: ${{ matrix.pip-allow-prerelease }} CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} steps: @@ -70,13 +57,12 @@ jobs: source bootstrap.sh # Report about the test matrix slot. - echo "Invoking tests with CrateDB ${CRATEDB_VERSION} and SQLAlchemy ${SQLALCHEMY_VERSION}" + echo "Invoking tests with CrateDB ${CRATEDB_VERSION}" # Run linter. flake8 src bin # Run tests. - export SQLALCHEMY_WARN_20=1 coverage run bin/test -vvv # Set the stage for uploading the coverage report. diff --git a/CHANGES.txt b/CHANGES.txt index ecce63d1..8a0b9bf3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,13 @@ Changes for crate Unreleased ========== +- The SQLAlchemy dialect has been split off into the `sqlalchemy-cratedb`_ + package. See `Migrate from crate.client to sqlalchemy-cratedb`_ to learn + about necessary migration steps. + +.. _Migrate from crate.client to sqlalchemy-cratedb: https://cratedb.com/docs/sqlalchemy-cratedb/migrate-from-crate-client.html +.. _sqlalchemy-cratedb: https://pypi.org/project/sqlalchemy-cratedb/ + 2024/01/18 0.35.2 ================= diff --git a/DEVELOP.rst b/DEVELOP.rst index b8fcaeae..41373f18 100644 --- a/DEVELOP.rst +++ b/DEVELOP.rst @@ -30,9 +30,7 @@ Run all tests:: Run specific tests:: - ./bin/test -vvvv -t SqlAlchemyCompilerTest ./bin/test -vvvv -t test_score - ./bin/test -vvvv -t sqlalchemy Ignore specific test directories:: diff --git a/bootstrap.sh b/bootstrap.sh index d5b6f500..733c39a0 100644 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -18,7 +18,6 @@ # Default variables. CRATEDB_VERSION=${CRATEDB_VERSION:-5.2.2} -SQLALCHEMY_VERSION=${SQLALCHEMY_VERSION:-<2.1} function print_header() { @@ -71,16 +70,7 @@ function setup_package() { fi # Install package in editable mode. - pip install ${PIP_OPTIONS} --editable='.[sqlalchemy,test]' - - # Install designated SQLAlchemy version. - if [ -n "${SQLALCHEMY_VERSION}" ]; then - if [ "${SQLALCHEMY_VERSION}" = "latest" ]; then - pip install ${PIP_OPTIONS} --upgrade "sqlalchemy" - else - pip install ${PIP_OPTIONS} --upgrade "sqlalchemy${SQLALCHEMY_VERSION}" - fi - fi + pip install ${PIP_OPTIONS} --editable='.[test]' } @@ -93,8 +83,6 @@ function finalize() { # Some steps before dropping into the activated virtualenv. echo echo "Sandbox environment ready" - echo -n "Using SQLAlchemy version: " - python -c 'import sqlalchemy; print(sqlalchemy.__version__)' echo } diff --git a/docs/by-example/index.rst b/docs/by-example/index.rst index 39c503e4..d6c0d2ec 100644 --- a/docs/by-example/index.rst +++ b/docs/by-example/index.rst @@ -5,11 +5,8 @@ By example ########## This part of the documentation enumerates different kinds of examples how to -use the CrateDB Python client. - - -DB API, HTTP, and BLOB interfaces -================================= +use the CrateDB Python DBAPI HTTP client for standards-based database +conversations, and the proprietary BLOB interfaces. The examples in this section are all about CrateDB's `Python DB API`_ interface, the plain HTTP API interface, and a convenience interface for working with @@ -27,24 +24,4 @@ methods, and behaviors of the ``Connection`` and ``Cursor`` objects. blob -.. _sqlalchemy-by-example: - -SQLAlchemy by example -===================== - -The examples in this section are all about CrateDB's `SQLAlchemy`_ dialect, and -its corresponding API interfaces, see also :ref:`sqlalchemy-support`. - -.. toctree:: - :maxdepth: 1 - - sqlalchemy/getting-started - sqlalchemy/crud - sqlalchemy/working-with-types - sqlalchemy/advanced-querying - sqlalchemy/inspection-reflection - sqlalchemy/dataframe - - .. _Python DB API: https://peps.python.org/pep-0249/ -.. _SQLAlchemy: https://www.sqlalchemy.org/ diff --git a/docs/by-example/sqlalchemy/advanced-querying.rst b/docs/by-example/sqlalchemy/advanced-querying.rst deleted file mode 100644 index 7c4d6781..00000000 --- a/docs/by-example/sqlalchemy/advanced-querying.rst +++ /dev/null @@ -1,335 +0,0 @@ -.. _sqlalchemy-advanced-querying: - -============================= -SQLAlchemy: Advanced querying -============================= - -This section of the documentation demonstrates running queries using a fulltext -index with an analyzer, queries using counting and aggregations, and support for -the ``INSERT...FROM SELECT`` and ``INSERT...RETURNING`` constructs, all using the -CrateDB SQLAlchemy dialect. - - -.. rubric:: Table of Contents - -.. contents:: - :local: - - -Introduction -============ - -Import the relevant symbols: - - >>> import sqlalchemy as sa - >>> from sqlalchemy.orm import sessionmaker - >>> try: - ... from sqlalchemy.orm import declarative_base - ... except ImportError: - ... from sqlalchemy.ext.declarative import declarative_base - >>> from uuid import uuid4 - -Establish a connection to the database, see also :ref:`sa:engines_toplevel` -and :ref:`connect`: - - >>> engine = sa.create_engine(f"crate://{crate_host}") - >>> connection = engine.connect() - -Create an SQLAlchemy :doc:`Session `: - - >>> session = sessionmaker(bind=engine)() - >>> Base = declarative_base() - - -Introduction to fulltext indexes -================================ - -:ref:`crate-reference:fulltext-indices` take the contents of one or more fields -and split it up into tokens that are used for fulltext-search. The -transformation from a text to separate tokens is done by an analyzer. In order -to conduct fulltext search queries, we need to create a table with a -:ref:`fulltext index with an analyzer `. - -.. code-block:: sql - - CREATE TABLE characters ( - id STRING PRIMARY KEY, - name STRING, - quote STRING, - INDEX name_ft USING fulltext(name) WITH (analyzer = 'english'), - INDEX quote_ft USING fulltext(quote) WITH (analyzer = 'english') - ) - -We have to create this table using SQL because it is currently not possible to -create ``INDEX`` fields using SQLAlchemy's :ref:`sa:orm_declarative_mapping`. -However, we can define the table to use all other operations: - - >>> def gen_key(): - ... return str(uuid4()) - - >>> class Character(Base): - ... __tablename__ = 'characters' - ... id = sa.Column(sa.String, primary_key=True, default=gen_key) - ... name = sa.Column(sa.String) - ... quote = sa.Column(sa.String) - ... name_ft = sa.Column(sa.String) - ... quote_ft = sa.Column(sa.String) - ... __mapper_args__ = { - ... 'exclude_properties': ['name_ft', 'quote_ft'] - ... } - -We define ``name_ft`` and ``quote_ft`` as regular columns, but add them under -``__mapper_args__.exclude_properties`` to ensure they're excluded from insert -or update operations. - -In order to support fulltext query operations, the CrateDB SQLAlchemy dialect -provides the :ref:`crate-reference:predicates_match` through its ``match`` -function. - -Let's add two records we use for testing. - - >>> arthur = Character(name='Arthur Dent') - >>> arthur.quote = "Let's go somewhere." - >>> session.add(arthur) - - >>> trillian = Character(name='Tricia McMillan') - >>> trillian.quote = "We're on a space ship Arthur. In space." - >>> session.add(trillian) - - >>> session.commit() - -After ``INSERT`` statements are submitted to the database, the newly inserted -records aren't immediately available for retrieval, because the index is only -updated periodically (default: each second). In order to synchronize that, -explicitly refresh the table: - - >>> _ = connection.execute(sa.text("REFRESH TABLE characters")) - - -Fulltext search with MATCH predicate -==================================== - -Fulltext search in CrateDB is performed using :ref:`crate-reference:predicates_match`. -The CrateDB SQLAlchemy dialect comes with a ``match`` function, which can be used to -search on one or multiple fields. - - >>> from crate.client.sqlalchemy.predicates import match - - >>> session.query(Character.name) \ - ... .filter(match(Character.name_ft, 'Arthur')) \ - ... .all() - [('Arthur Dent',)] - -To get the relevance of a matching row, you can select the ``_score`` system -column. It is a numeric value which is relative to the other rows. -The higher the score value, the more relevant the row. - -In most cases, ``_score`` is not part of the SQLAlchemy table definition, -so it must be passed as a verbatim string, using ``literal_column``: - - >>> session.query(Character.name, sa.literal_column('_score')) \ - ... .filter(match(Character.quote_ft, 'space')) \ - ... .all() - [('Tricia McMillan', ...)] - -To search multiple columns, use a dictionary where the keys are the columns and -the values are a ``boost``. A ``boost`` is a factor that increases the relevance -of a column in respect to the other columns: - - >>> session.query(Character.name) \ - ... .filter(match({Character.name_ft: 1.5, Character.quote_ft: 0.1}, - ... 'Arthur')) \ - ... .order_by(sa.desc(sa.literal_column('_score'))) \ - ... .all() - [('Arthur Dent',), ('Tricia McMillan',)] - -The ``match_type`` argument determines how a single ``query_term`` is applied, -and how the resulting ``_score`` is computed. Thus, it influences which -documents are considered more relevant. The default selection is ``best_fields``. -For more information, see :ref:`crate-reference:predicates_match_types`. - -If you want to sort the results by ``_score``, you can use the ``order_by()`` -function. - - >>> session.query(Character.name) \ - ... .filter( - ... match(Character.name_ft, 'Arth', - ... match_type='phrase', - ... options={'fuzziness': 3}) - ... ) \ - ... .all() - [('Arthur Dent',)] - -It is not possible to specify options without the ``match_type`` argument: - - >>> session.query(Character.name) \ - ... .filter( - ... match(Character.name_ft, 'Arth', - ... options={'fuzziness': 3}) - ... ) \ - ... .all() - Traceback (most recent call last): - ValueError: missing match_type. It's not allowed to specify options without match_type - - -Aggregates: Counting and grouping -================================= - -SQLAlchemy supports different approaches to issue a query with a count -aggregate function. Take a look at the `count result rows`_ documentation -for a full overview. - -CrateDB currently does not support all variants as it can not handle the -sub-queries yet. - -This means that queries using ``count()`` have to be written in one of the -following ways: - - >>> session.query(sa.func.count(Character.id)).scalar() - 2 - - >>> session.query(sa.func.count('*')).select_from(Character).scalar() - 2 - -Using the ``group_by`` clause is similar: - - >>> session.query(sa.func.count(Character.id), Character.name) \ - ... .group_by(Character.name) \ - ... .order_by(sa.desc(sa.func.count(Character.id))) \ - ... .order_by(Character.name).all() - [(1, 'Arthur Dent'), (1, 'Tricia McMillan')] - - -``INSERT...FROM SELECT`` -======================== - -In SQLAlchemy, the ``insert().from_select()`` function returns a new ``Insert`` -construct, which represents an ``INSERT...FROM SELECT`` statement. This -functionality is supported by the CrateDB client library. Here is an example -that uses ``insert().from_select()``. - -First, let's define and create the tables: - - >>> from sqlalchemy import select, insert - - >>> class Todos(Base): - ... __tablename__ = 'todos' - ... __table_args__ = { - ... 'crate_number_of_replicas': '0' - ... } - ... id = sa.Column(sa.String, primary_key=True, default=gen_key) - ... content = sa.Column(sa.String) - ... status = sa.Column(sa.String) - - >>> class ArchivedTasks(Base): - ... __tablename__ = 'archived_tasks' - ... __table_args__ = { - ... 'crate_number_of_replicas': '0' - ... } - ... id = sa.Column(sa.String, primary_key=True) - ... content = sa.Column(sa.String) - - >>> Base.metadata.create_all(bind=engine) - -Let's add a task to the ``Todo`` table: - - >>> task = Todos(content='Write Tests', status='done') - >>> session.add(task) - >>> session.commit() - >>> _ = connection.execute(sa.text("REFRESH TABLE todos")) - -Now, let's use ``insert().from_select()`` to archive the task into the -``ArchivedTasks`` table: - - >>> sel = select(Todos.id, Todos.content).where(Todos.status == "done") - >>> ins = insert(ArchivedTasks).from_select(['id', 'content'], sel) - >>> result = session.execute(ins) - >>> session.commit() - -This will emit the following ``INSERT`` statement to the database: - - INSERT INTO archived_tasks (id, content) - (SELECT todos.id, todos.content FROM todos WHERE todos.status = 'done') - -Now, verify that the data is present in the database: - - >>> _ = connection.execute(sa.text("REFRESH TABLE archived_tasks")) - >>> pprint([str(r) for r in session.execute(sa.text("SELECT content FROM archived_tasks"))]) - ["('Write Tests',)"] - - -``INSERT...RETURNING`` -====================== - -The ``RETURNING`` clause can be used to retrieve the result rows of an ``INSERT`` -operation. It may be specified using the ``Insert.returning()`` method. - -The first step is to define the table: - - >>> from sqlalchemy import insert - - >>> class User(Base): - ... __tablename__ = 'user' - ... __table_args__ = { - ... 'crate_number_of_replicas': '0' - ... } - ... id = sa.Column(sa.String, primary_key=True, default=gen_key) - ... username = sa.Column(sa.String) - ... email = sa.Column(sa.String) - - >>> Base.metadata.create_all(bind=engine) - -Now, let's use the returning clause on our insert to retrieve the values inserted: - - >>> stmt = insert(User).values(username='Crate', email='crate@crate.io').returning(User.username, User.email) - >>> result = session.execute(stmt) - >>> session.commit() - >>> print([str(r) for r in result]) - ["('Crate', 'crate@crate.io')"] - -The following ``INSERT...RETURNING`` statement was issued to the database:: - - INSERT INTO user (id, username, email) - VALUES (:id, :username, :email) - RETURNING user.id, user.username, user.email - -``UPDATE...RETURNING`` - -The ``RETURNING`` clause can also be used with an ``UPDATE`` operation to return -specified rows to be returned on execution. It can be specified using the -``Update.returning()`` method. - - -We can reuse the user table previously created in the ``INSERT...RETURNING`` section. - -Insert a user and get the user id: - - >>> from sqlalchemy import insert, update - - >>> stmt = insert(User).values(username='Arthur Dent', email='arthur_dent@crate.io').returning(User.id, User.username, User.email) - >>> result = session.execute(stmt) - >>> session.commit() - >>> uid = [r[0] for r in result][0] - -Now let's update the user: - - >>> stmt = update(User).where(User.id == uid).values(username='Tricia McMillan', email='tricia_mcmillan@crate.io').returning(User.username, User.email) - >>> res = session.execute(stmt) - >>> session.commit() - >>> print([str(r) for r in res]) - ["('Tricia McMillan', 'tricia_mcmillan@crate.io')"] - -The following ``UPDATE...RETURNING`` statement was issued to the database:: - - UPDATE user SET username=:username, email=:email - WHERE user.id = :id_1 - RETURNING user.username, user.email - -.. hidden: Disconnect from database - - >>> session.close() - >>> connection.close() - >>> engine.dispose() - - -.. _count result rows: https://docs.sqlalchemy.org/en/14/orm/tutorial.html#counting diff --git a/docs/by-example/sqlalchemy/crud.rst b/docs/by-example/sqlalchemy/crud.rst deleted file mode 100644 index 5a62df40..00000000 --- a/docs/by-example/sqlalchemy/crud.rst +++ /dev/null @@ -1,301 +0,0 @@ -.. _sqlalchemy-crud: - -================================================ -SQLAlchemy: Create, retrieve, update, and delete -================================================ - -This section of the documentation shows how to query, insert, update and delete -records using CrateDB's SQLAlchemy integration, it includes common scenarios -like: - -- Filtering records -- Limiting result sets -- Inserts and updates with default values - - -.. rubric:: Table of Contents - -.. contents:: - :local: - - -Introduction -============ - -Import the relevant symbols: - - >>> import sqlalchemy as sa - >>> from datetime import datetime - >>> from sqlalchemy import delete, func, text - >>> from sqlalchemy.orm import sessionmaker - >>> try: - ... from sqlalchemy.orm import declarative_base - ... except ImportError: - ... from sqlalchemy.ext.declarative import declarative_base - >>> from crate.client.sqlalchemy.types import ObjectArray - -Establish a connection to the database, see also :ref:`sa:engines_toplevel` -and :ref:`connect`: - - >>> engine = sa.create_engine(f"crate://{crate_host}") - >>> connection = engine.connect() - -Define the ORM schema for the ``Location`` entity using SQLAlchemy's -:ref:`sa:orm_declarative_mapping`: - - >>> Base = declarative_base() - - >>> class Location(Base): - ... __tablename__ = 'locations' - ... name = sa.Column(sa.String, primary_key=True) - ... kind = sa.Column(sa.String) - ... date = sa.Column(sa.Date, default=lambda: datetime.utcnow().date()) - ... datetime_tz = sa.Column(sa.DateTime, default=datetime.utcnow) - ... datetime_notz = sa.Column(sa.DateTime, default=datetime.utcnow) - ... nullable_datetime = sa.Column(sa.DateTime) - ... nullable_date = sa.Column(sa.Date) - ... flag = sa.Column(sa.Boolean) - ... details = sa.Column(ObjectArray) - -Create an SQLAlchemy :doc:`Session `: - - >>> session = sessionmaker(bind=engine)() - - -Create -====== - -Insert a new location: - - >>> location = Location() - >>> location.name = 'Earth' - >>> location.kind = 'Planet' - >>> location.flag = True - - >>> session.add(location) - >>> session.flush() - -Refresh "locations" table: - - >>> _ = connection.execute(text("REFRESH TABLE locations")) - -Inserted location is available: - - >>> location = session.query(Location).filter_by(name='Earth').one() - >>> location.name - 'Earth' - -Retrieve the location from the database: - - >>> session.refresh(location) - >>> location.name - 'Earth' - -Three ``date``/``datetime`` columns are defined with default values, so -creating a new record will automatically set them: - - >>> type(location.date) - - - >>> type(location.datetime_tz) - - - >>> type(location.datetime_notz) - - -The location instance also has other ``date`` and ``datetime`` attributes which -are nullable. Because there is no default value defined in the ORM schema for -them, they are not set when the record is inserted: - - >>> location.nullable_datetime is None - True - - >>> location.nullable_date is None - True - -.. hidden: - - >>> from datetime import datetime, timedelta - >>> now = datetime.utcnow() - - >>> (now - location.datetime_tz).seconds < 4 - True - - >>> (now.date() - location.date) == timedelta(0) - True - - -Retrieve -======== - -Using the connection to execute a select statement: - - >>> result = connection.execute(text('select name from locations order by name')) - >>> result.rowcount - 14 - - >>> result.first() - ('Aldebaran',) - -Using the ORM to query the locations: - - >>> locations = session.query(Location).order_by('name') - >>> [l.name for l in locations if l is not None][:2] - ['Aldebaran', 'Algol'] - -With limit and offset: - - >>> locations = session.query(Location).order_by('name').offset(1).limit(2) - >>> [l.name for l in locations if l is not None] - ['Algol', 'Allosimanius Syneca'] - -With filter: - - >>> location = session.query(Location).filter_by(name='Algol').one() - >>> location.name - 'Algol' - -Order by: - - >>> locations = session.query(Location).filter(Location.name is not None).order_by(sa.desc(Location.name)) - >>> locations = locations.limit(2) - >>> [l.name for l in locations] - ['Outer Eastern Rim', 'North West Ripple'] - - -Update -====== - -Back to our original object ``Location(Earth)``. - - >>> location = session.query(Location).filter_by(name='Earth').one() - -The datetime and date can be set using an update statement: - - >>> location.nullable_date = datetime.utcnow().date() - >>> location.nullable_datetime = datetime.utcnow() - >>> session.flush() - -Refresh "locations" table: - - >>> _ = connection.execute(text("REFRESH TABLE locations")) - -Boolean values get set natively: - - >>> location.flag - True - -Reload the object from the database: - - >>> session.refresh(location) - -And verify that the date and datetime was persisted: - - >>> location.nullable_datetime is not None - True - - >>> location.nullable_date is not None - True - -Update a record using SQL: - - >>> with engine.begin() as conn: - ... result = conn.execute(text("update locations set kind='Heimat' where name='Earth'")) - ... result.rowcount - 1 - -Update multiple records: - - >>> for x in range(10): - ... loc = Location() - ... loc.name = 'Ort %d' % x - ... loc.kind = 'Update' - ... session.add(loc) - >>> session.flush() - -Refresh table: - - >>> _ = connection.execute(text("REFRESH TABLE locations")) - -Update multiple records using SQL: - - >>> with engine.begin() as conn: - ... result = conn.execute(text("update locations set flag=true where kind='Update'")) - ... result.rowcount - 10 - -Update all records using SQL, and check that the number of documents affected -of an update without ``where-clause`` matches the number of all documents in -the table: - - >>> with engine.begin() as conn: - ... result = conn.execute(text(u"update locations set kind='Überall'")) - ... result.rowcount == conn.execute(text("select * from locations limit 100")).rowcount - True - - >>> session.commit() - -Refresh "locations" table: - - >>> _ = connection.execute(text("REFRESH TABLE locations")) - -Objects can be used within lists, too: - - >>> location = session.query(Location).filter_by(name='Folfanga').one() - >>> location.details = [{'size': 'huge'}, {'clima': 'cold'}] - - >>> session.commit() - >>> session.refresh(location) - - >>> location.details - [{'size': 'huge'}, {'clima': 'cold'}] - -Update the record: - - >>> location.details[1] = {'clima': 'hot'} - - >>> session.commit() - >>> session.refresh(location) - - >>> location.details - [{'size': 'huge'}, {'clima': 'hot'}] - -Reset the record: - - >>> location.details = [] - >>> session.commit() - >>> session.refresh(location) - - >>> location.details - [] - -.. seealso:: - - The documentation section :ref:`sqlalchemy-working-with-types` has more - details about this topic. - - -Delete -====== - -Deleting a record with SQLAlchemy works like this. - - >>> session.query(Location).count() - 24 - - >>> location = session.query(Location).first() - >>> session.delete(location) - >>> session.commit() - >>> session.flush() - - >>> _ = connection.execute(text("REFRESH TABLE locations")) - - >>> session.query(Location).count() - 23 - - -.. hidden: Disconnect from database - - >>> session.close() - >>> connection.close() - >>> engine.dispose() diff --git a/docs/by-example/sqlalchemy/dataframe.rst b/docs/by-example/sqlalchemy/dataframe.rst deleted file mode 100644 index 60c49d1d..00000000 --- a/docs/by-example/sqlalchemy/dataframe.rst +++ /dev/null @@ -1,258 +0,0 @@ -.. _sqlalchemy-pandas: -.. _sqlalchemy-dataframe: - -================================ -SQLAlchemy: DataFrame operations -================================ - -.. rubric:: Table of Contents - -.. contents:: - :local: - - -About -===== - -This section of the documentation demonstrates support for efficient batch/bulk -``INSERT`` operations with `pandas`_ and `Dask`_, using the CrateDB SQLAlchemy dialect. - -Efficient bulk operations are needed for typical `ETL`_ batch processing and -data streaming workloads, for example to move data in and out of OLAP data -warehouses, as contrasted to interactive online transaction processing (OLTP) -applications. The strategies of `batching`_ together series of records for -improving performance are also referred to as `chunking`_. - - -Introduction -============ - -pandas ------- -The :ref:`pandas DataFrame ` is a structure that contains -two-dimensional data and its corresponding labels. DataFrames are widely used -in data science, machine learning, scientific computing, and many other -data-intensive fields. - -DataFrames are similar to SQL tables or the spreadsheets that you work with in -Excel or Calc. In many cases, DataFrames are faster, easier to use, and more -powerful than tables or spreadsheets because they are an integral part of the -`Python`_ and `NumPy`_ ecosystems. - -The :ref:`pandas I/O subsystem ` for `relational databases`_ -using `SQL`_ is based on `SQLAlchemy`_. - -Dask ----- -`Dask`_ is a flexible library for parallel computing in Python, which scales -Python code from multi-core local machines to large distributed clusters in -the cloud. Dask provides a familiar user interface by mirroring the APIs of -other libraries in the PyData ecosystem, including `pandas`_, `scikit-learn`_, -and `NumPy`_. - -A :doc:`dask:dataframe` is a large parallel DataFrame composed of many smaller -pandas DataFrames, split along the index. These pandas DataFrames may live on -disk for larger-than-memory computing on a single machine, or on many different -machines in a cluster. One Dask DataFrame operation triggers many operations on -the constituent pandas DataFrames. - - -Compatibility notes -=================== - -.. NOTE:: - - Please note that DataFrame support for pandas and Dask is only validated - with Python 3.8 and higher, and SQLAlchemy 1.4 and higher. We recommend - to use the most recent versions of those libraries. - - -Efficient ``INSERT`` operations with pandas -=========================================== - -The package provides a ``bulk_insert`` function to use the -:meth:`pandas:pandas.DataFrame.to_sql` method more efficiently, based on the -`CrateDB bulk operations`_ endpoint. It will effectively split your insert -workload across multiple batches, using a defined chunk size. - - >>> import sqlalchemy as sa - >>> from crate.client.sqlalchemy.support import insert_bulk - >>> from pueblo.testing.pandas import makeTimeDataFrame - ... - >>> # Define number of records, and chunk size. - >>> INSERT_RECORDS = 42 - >>> CHUNK_SIZE = 8 - ... - >>> # Create a pandas DataFrame, and connect to CrateDB. - >>> df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S") - >>> engine = sa.create_engine(f"crate://{crate_host}") - ... - >>> # Insert content of DataFrame using batches of records. - >>> # Effectively, it's six. 42 / 8 = 5.25. - >>> df.to_sql( - ... name="test-testdrive", - ... con=engine, - ... if_exists="replace", - ... index=False, - ... chunksize=CHUNK_SIZE, - ... method=insert_bulk, - ... ) - -.. TIP:: - - You will observe that the optimal chunk size highly depends on the shape of - your data, specifically the width of each record, i.e. the number of columns - and their individual sizes, which will in the end determine the total size of - each batch/chunk. - - A few details should be taken into consideration when determining the optimal - chunk size for a specific dataset. We are outlining the two major ones. - - - First, when working with data larger than the main memory available on your - machine, each chunk should be small enough to fit into the memory, but large - enough to minimize the overhead of a single data insert operation. Depending - on whether you are running other workloads on the same machine, you should - also account for the total share of heap memory you will assign to each domain, - to prevent overloading the system as a whole. - - - Second, as each batch is submitted using HTTP, you should know about the request - size limits and other constraints of your HTTP infrastructure, which may include - any types of HTTP intermediaries relaying information between your database client - application and your CrateDB cluster. For example, HTTP proxy servers or load - balancers not optimally configured for performance, or web application firewalls - and intrusion prevention systems may hamper HTTP communication, sometimes in - subtle ways, for example based on request size constraints, or throttling - mechanisms. If you are working with very busy systems, and hosting it on shared - infrastructure, details like `SNAT port exhaustion`_ may also come into play. - - You will need to determine a good chunk size by running corresponding experiments - on your own behalf. For that purpose, you can use the `insert_pandas.py`_ program - as a blueprint. - - It is a good idea to start your explorations with a chunk size of 5_000, and - then see if performance improves when you increase or decrease that figure. - People are reporting that 10_000-20_000 is their optimal setting, but if you - process, for example, just three "small" columns, you may also experiment with - `leveling up to 200_000`_, because `the chunksize should not be too small`_. - If it is too small, the I/O cost will be too high to overcome the benefit of - batching. - - In order to learn more about what wide- vs. long-form (tidy, stacked, narrow) - data means in the context of `DataFrame computing`_, let us refer you to `a - general introduction `_, the corresponding section in - the `Data Computing book `_, and a `pandas - tutorial `_ about the same topic. - - -Efficient ``INSERT`` operations with Dask -========================================= - -The same ``bulk_insert`` function presented in the previous section will also -be used in the context of `Dask`_, in order to make the -:func:`dask:dask.dataframe.to_sql` method more efficiently, based on the -`CrateDB bulk operations`_ endpoint. - -The example below will partition your insert workload into equal-sized parts, and -schedule it to be executed on Dask cluster resources, using a defined number of -compute partitions. Each worker instance will then insert its partition's records -in a batched/chunked manner, using a defined chunk size, effectively using the -pandas implementation introduced in the previous section. - - >>> import dask.dataframe as dd - >>> from crate.client.sqlalchemy.support import insert_bulk - >>> from pueblo.testing.pandas import makeTimeDataFrame - ... - >>> # Define the number of records, the number of computing partitions, - >>> # and the chunk size of each database insert operation. - >>> INSERT_RECORDS = 100 - >>> NPARTITIONS = 4 - >>> CHUNK_SIZE = 25 - ... - >>> # Create a Dask DataFrame. - >>> df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S") - >>> ddf = dd.from_pandas(df, npartitions=NPARTITIONS) - ... - >>> # Insert content of DataFrame using multiple workers on a - >>> # compute cluster, transferred using batches of records. - >>> ddf.to_sql( - ... name="test-testdrive", - ... uri=f"crate://{crate_host}", - ... if_exists="replace", - ... index=False, - ... chunksize=CHUNK_SIZE, - ... method=insert_bulk, - ... parallel=True, - ... ) - - -.. TIP:: - - You will observe that optimizing your workload will now also involve determining a - good value for the ``NPARTITIONS`` argument, based on the capacity and topology of - the available compute resources, and based on workload characteristics or policies - like peak- vs. balanced- vs. shared-usage. For example, on a machine or cluster fully - dedicated to the problem at hand, you may want to use all available processor cores, - while on a shared system, this strategy may not be appropriate. - - If you want to dedicate all available compute resources on your machine, you may want - to use the number of CPU cores as a value to the ``NPARTITIONS`` argument. You can find - out about the available CPU cores on your machine, for example by running the ``nproc`` - command in your terminal. - - Depending on the implementation and runtime behavior of the compute task, the optimal - number of worker processes, determined by the ``NPARTITIONS`` argument, also needs to be - figured out by running a few test iterations. For that purpose, you can use the - `insert_dask.py`_ program as a blueprint. - - Adjusting this value in both directions is perfectly fine: If you observe that you are - overloading the machine, maybe because there are workloads scheduled other than the one - you are running, try to reduce the value. If fragments/steps of your implementation - involve waiting for network or disk I/O, you may want to increase the number of workers - beyond the number of available CPU cores, to increase utilization. On the other hand, - you should be wary about not over-committing resources too much, as it may slow your - system down. - - Before getting more serious with Dask, you are welcome to read and watch the excellent - :doc:`dask:best-practices` and :ref:`dask:dataframe.performance` resources, in order to - learn about things to avoid, and beyond. For finding out if your compute workload - scheduling is healthy, you can, for example, use Dask's :doc:`dask:dashboard`. - -.. WARNING:: - - Because the settings assigned in the example above fit together well, the ``to_sql()`` - instruction will effectively run four insert operations, executed in parallel, and - scheduled optimally on the available cluster resources. - - However, not using those settings sensibly, you can easily misconfigure the resource - scheduling system, and overload the underlying hardware or operating system, virtualized - or not. This is why experimenting with different parameters, and a real dataset, is crucial. - - - -.. hidden: Disconnect from database - - >>> engine.dispose() - - -.. _batching: https://en.wikipedia.org/wiki/Batch_processing#Common_batch_processing_usage -.. _chunking: https://en.wikipedia.org/wiki/Chunking_(computing) -.. _CrateDB bulk operations: https://crate.io/docs/crate/reference/en/latest/interfaces/http.html#bulk-operations -.. _Dask: https://en.wikipedia.org/wiki/Dask_(software) -.. _DataFrame computing: https://realpython.com/pandas-dataframe/ -.. _ETL: https://en.wikipedia.org/wiki/Extract,_transform,_load -.. _insert_dask.py: https://github.com/crate/cratedb-examples/blob/main/by-language/python-sqlalchemy/insert_dask.py -.. _insert_pandas.py: https://github.com/crate/cratedb-examples/blob/main/by-language/python-sqlalchemy/insert_pandas.py -.. _leveling up to 200_000: https://acepor.github.io/2017/08/03/using-chunksize/ -.. _NumPy: https://en.wikipedia.org/wiki/NumPy -.. _pandas: https://en.wikipedia.org/wiki/Pandas_(software) -.. _pandas DataFrame: https://pandas.pydata.org/pandas-docs/stable/reference/frame.html -.. _Python: https://en.wikipedia.org/wiki/Python_(programming_language) -.. _relational databases: https://en.wikipedia.org/wiki/Relational_database -.. _scikit-learn: https://en.wikipedia.org/wiki/Scikit-learn -.. _SNAT port exhaustion: https://learn.microsoft.com/en-us/azure/load-balancer/troubleshoot-outbound-connection -.. _SQL: https://en.wikipedia.org/wiki/SQL -.. _SQLAlchemy: https://aosabook.org/en/v2/sqlalchemy.html -.. _the chunksize should not be too small: https://acepor.github.io/2017/08/03/using-chunksize/ -.. _wide-narrow-general: https://en.wikipedia.org/wiki/Wide_and_narrow_data -.. _wide-narrow-data-computing: https://dtkaplan.github.io/DataComputingEbook/chap-wide-vs-narrow.html#chap:wide-vs-narrow -.. _wide-narrow-pandas-tutorial: https://anvil.works/blog/tidy-data diff --git a/docs/by-example/sqlalchemy/getting-started.rst b/docs/by-example/sqlalchemy/getting-started.rst deleted file mode 100644 index 33e8f75d..00000000 --- a/docs/by-example/sqlalchemy/getting-started.rst +++ /dev/null @@ -1,211 +0,0 @@ -.. _sqlalchemy-getting-started: - -=========================== -SQLAlchemy: Getting started -=========================== - -This section of the documentation shows how to connect to CrateDB using its -SQLAlchemy dialect, and how to run basic DDL statements based on an SQLAlchemy -ORM schema definition. - -Subsequent sections of the documentation will cover: - -- :ref:`sqlalchemy-crud` -- :ref:`sqlalchemy-working-with-types` -- :ref:`sqlalchemy-advanced-querying` -- :ref:`sqlalchemy-inspection-reflection` - - -.. rubric:: Table of Contents - -.. contents:: - :local: - - -Introduction -============ - -Import the relevant symbols: - - >>> import sqlalchemy as sa - >>> from sqlalchemy.orm import sessionmaker - >>> try: - ... from sqlalchemy.orm import declarative_base - ... except ImportError: - ... from sqlalchemy.ext.declarative import declarative_base - -Establish a connection to the database, see also :ref:`sa:engines_toplevel` -and :ref:`connect`: - - >>> engine = sa.create_engine(f"crate://{crate_host}") - >>> connection = engine.connect() - -Create an SQLAlchemy :doc:`Session `: - - >>> session = sessionmaker(bind=engine)() - >>> Base = declarative_base() - - -Connect -======= - -In SQLAlchemy, a connection is established using the ``create_engine`` function. -This function takes a connection string, actually an `URL`_, that varies from -database to database. - -In order to connect to a CrateDB cluster, the following connection strings are -valid: - - >>> sa.create_engine('crate://') - Engine(crate://) - -This will connect to the default server ('127.0.0.1:4200'). In order to connect -to a different server the following syntax can be used: - - >>> sa.create_engine('crate://otherserver:4200') - Engine(crate://otherserver:4200) - -Multiple Hosts --------------- -Because CrateDB is a clustered database running on multiple servers, it is -recommended to connect to all of them. This enables the DB-API layer to -use round-robin to distribute the load and skip a server if it becomes -unavailable. In order to make the driver aware of multiple servers, use -the ``connect_args`` parameter like so: - - >>> sa.create_engine('crate://', connect_args={ - ... 'servers': ['host1:4200', 'host2:4200'] - ... }) - Engine(crate://) - -TLS Options ------------ -As defined in :ref:`https_connection`, the client validates SSL server -certificates by default. To configure this further, use e.g. the ``ca_cert`` -attribute within the ``connect_args``, like: - - >>> ssl_engine = sa.create_engine( - ... 'crate://', - ... connect_args={ - ... 'servers': ['https://host1:4200'], - ... 'ca_cert': '/path/to/cacert.pem', - ... }) - -In order to disable SSL verification, use ``verify_ssl_cert = False``, like: - - >>> ssl_engine = sa.create_engine( - ... 'crate://', - ... connect_args={ - ... 'servers': ['https://host1:4200'], - ... 'verify_ssl_cert': False, - ... }) - -Timeout Options ---------------- -In order to configure TCP timeout options, use the ``timeout`` parameter within -``connect_args``, - - >>> timeout_engine = sa.create_engine('crate://localhost/', connect_args={'timeout': 42.42}) - >>> timeout_engine.raw_connection().driver_connection.client._pool_kw["timeout"] - 42.42 - -or use the ``timeout`` URL parameter within the database connection URL. - - >>> timeout_engine = sa.create_engine('crate://localhost/?timeout=42.42') - >>> timeout_engine.raw_connection().driver_connection.client._pool_kw["timeout"] - 42.42 - -Pool Size ---------- - -In order to configure the database connection pool size, use the ``pool_size`` -parameter within ``connect_args``, - - >>> timeout_engine = sa.create_engine('crate://localhost/', connect_args={'pool_size': 20}) - >>> timeout_engine.raw_connection().driver_connection.client._pool_kw["maxsize"] - 20 - -or use the ``pool_size`` URL parameter within the database connection URL. - - >>> timeout_engine = sa.create_engine('crate://localhost/?pool_size=20') - >>> timeout_engine.raw_connection().driver_connection.client._pool_kw["maxsize"] - 20 - - -Basic DDL operations -==================== - -.. note:: - - CrateDB currently does not know about different "databases". Instead, - tables can be created in different *schemas*. Schemas are created - implicitly on table creation and cannot be created explicitly. If a schema - does not exist yet, it will be created. - - The default CrateDB schema is ``doc``, and if you do not specify a schema, - this is what will be used. - - See also :ref:`schema-selection` and :ref:`crate-reference:ddl-create-table-schemas`. - - -Create tables -------------- - -First the table definition as class, using SQLAlchemy's :ref:`sa:orm_declarative_mapping`: - - >>> class Department(Base): - ... __tablename__ = 'departments' - ... __table_args__ = { - ... 'crate_number_of_replicas': '0' - ... } - ... id = sa.Column(sa.String, primary_key=True) - ... name = sa.Column(sa.String) - ... code = sa.Column(sa.Integer) - -As seen below, the table doesn't exist yet: - - >>> engine.dialect.has_table(connection, table_name='departments') - False - -In order to create all missing tables, the ``create_all`` method can be used: - - >>> Base.metadata.create_all(bind=engine) - -With that, the table has been created: - - >>> engine.dialect.has_table(connection, table_name='departments') - True - -Let's also verify that by inquiring the ``information_schema.columns`` table: - - >>> stmt = ("select table_name, column_name, ordinal_position, data_type " - ... "from information_schema.columns " - ... "where table_name = 'departments' " - ... "order by column_name") - >>> pprint([str(r) for r in connection.execute(sa.text(stmt))]) - ["('departments', 'code', 3, 'integer')", - "('departments', 'id', 1, 'text')", - "('departments', 'name', 2, 'text')"] - - -Drop tables ------------ - -In order to delete all tables reference within the ORM schema, invoke -``Base.metadata.drop_all()``. To delete a single table, use -``drop(...)``, as shown below: - - >>> Base.metadata.tables['departments'].drop(engine) - - >>> engine.dialect.has_table(connection, table_name='departments') - False - - -.. hidden: Disconnect from database - - >>> session.close() - >>> connection.close() - >>> engine.dispose() - - -.. _URL: https://en.wikipedia.org/wiki/Uniform_Resource_Locator diff --git a/docs/by-example/sqlalchemy/inspection-reflection.rst b/docs/by-example/sqlalchemy/inspection-reflection.rst deleted file mode 100644 index bb291157..00000000 --- a/docs/by-example/sqlalchemy/inspection-reflection.rst +++ /dev/null @@ -1,126 +0,0 @@ -.. _sqlalchemy-inspection-reflection: - -===================================================== -SQLAlchemy: Database schema inspection and reflection -===================================================== - -This section shows you how to inspect the schema of a database using CrateDB's -SQLAlchemy integration. - - -Introduction -============ - -The CrateDB SQLAlchemy integration provides different ways to inspect the -database. - -1) The :ref:`runtime inspection API ` allows you to get - an ``Inspector`` instance that can be used to fetch schema names, table names - and other information. - -2) Reflection capabilities allow you to create ``Table`` instances from - existing tables to inspect their columns and constraints. - -3) A ``CrateDialect`` allows you to get connection information and it contains - low level function to check the existence of schemas and tables. - -All approaches require an ``Engine`` instance, which you can create like this: - - >>> import sqlalchemy as sa - >>> engine = sa.create_engine(f"crate://{crate_host}") - -This effectively establishes a connection to the database, see also -:ref:`sa:engines_toplevel` and :ref:`connect`. - - -Inspector -========= - -The :ref:`SQLAlchemy inspector ` is a low -level interface which provides a backend-agnostic system of loading lists of -schema, table, column, and constraint descriptions from a given database. -You can create an inspector like this: - - >>> inspector = sa.inspect(engine) - -List all schemas: - - >>> inspector.get_schema_names() - ['blob', 'doc', 'information_schema', 'pg_catalog', 'sys'] - -List all tables: - - >>> set(['characters', 'cities', 'locations']).issubset(inspector.get_table_names()) - True - - >>> set(['checks', 'cluster', 'jobs', 'jobs_log']).issubset(inspector.get_table_names(schema='sys')) - True - -List all views: - - >>> inspector.get_view_names() - ['characters_view'] - -Get default schema name: - - >>> inspector.default_schema_name - 'doc' - - -Schema-supported reflection -=========================== - -A ``Table`` object can load its own schema information from the corresponding -table in the database. This process is called *reflection*, see -:ref:`sa:metadata_reflection`. - -In the most simple case you need only specify the table name, a ``MetaData`` -object, and the ``autoload_with`` argument. - -Create a SQLAlchemy table object: - - >>> meta = sa.MetaData() - >>> table = sa.Table( - ... "characters", meta, - ... autoload_with=engine) - -Reflect column data types from the table metadata: - - >>> table.columns.get('name') - Column('name', String(), table=) - - >>> table.primary_key - PrimaryKeyConstraint(Column('id', String(), table=, primary_key=True... - - -CrateDialect -============ - -After initializing the dialect instance with a connection instance, - - >>> from crate.client.sqlalchemy.dialect import CrateDialect - >>> dialect = CrateDialect() - - >>> connection = engine.connect() - >>> dialect.initialize(connection) - -the database server version and default schema name can be inquired. - - >>> dialect.server_version_info >= (1, 0, 0) - True - -Check if a schema exists: - - >>> dialect.has_schema(connection, 'doc') - True - -Check if a table exists: - - >>> dialect.has_table(connection, 'locations') - True - - -.. hidden: Disconnect from database - - >>> connection.close() - >>> engine.dispose() diff --git a/docs/by-example/sqlalchemy/working-with-types.rst b/docs/by-example/sqlalchemy/working-with-types.rst deleted file mode 100644 index 169acede..00000000 --- a/docs/by-example/sqlalchemy/working-with-types.rst +++ /dev/null @@ -1,265 +0,0 @@ -.. _sqlalchemy-working-with-types: - -============================================== -SQLAlchemy: Working with special CrateDB types -============================================== - -This section of the documentation shows how to work with special data types -from the CrateDB SQLAlchemy dialect. Currently, these are: - -- Container types ``ObjectType`` and ``ObjectArray``. -- Geospatial types ``Geopoint`` and ``Geoshape``. - - -.. rubric:: Table of Contents - -.. contents:: - :local: - - -Introduction -============ - -Import the relevant symbols: - - >>> import sqlalchemy as sa - >>> from datetime import datetime - >>> from geojson import Point, Polygon - >>> from sqlalchemy import delete, func, text - >>> from sqlalchemy.orm import sessionmaker - >>> from sqlalchemy.sql import operators - >>> try: - ... from sqlalchemy.orm import declarative_base - ... except ImportError: - ... from sqlalchemy.ext.declarative import declarative_base - >>> from uuid import uuid4 - >>> from crate.client.sqlalchemy.types import ObjectType, ObjectArray - >>> from crate.client.sqlalchemy.types import Geopoint, Geoshape - -Establish a connection to the database, see also :ref:`sa:engines_toplevel` -and :ref:`connect`: - - >>> engine = sa.create_engine(f"crate://{crate_host}") - >>> connection = engine.connect() - -Create an SQLAlchemy :doc:`Session `: - - >>> session = sessionmaker(bind=engine)() - >>> Base = declarative_base() - - -Introduction to container types -=============================== - -In a document oriented database, it is a common pattern to store objects within -a single field. For such cases, the CrateDB SQLAlchemy dialect provides the -``ObjectType`` and ``ObjectArray`` types. - -The ``ObjectType`` type effectively implements a dictionary- or map-like type. The -``ObjectArray`` type maps to a Python list of dictionaries. - -For exercising those features, let's define a schema using SQLAlchemy's -:ref:`sa:orm_declarative_mapping`: - - >>> def gen_key(): - ... return str(uuid4()) - - >>> class Character(Base): - ... __tablename__ = 'characters' - ... id = sa.Column(sa.String, primary_key=True, default=gen_key) - ... name = sa.Column(sa.String) - ... quote = sa.Column(sa.String) - ... details = sa.Column(ObjectType) - ... more_details = sa.Column(ObjectArray) - -In CrateDB's SQL dialect, those container types map to :ref:`crate-reference:type-object` -and :ref:`crate-reference:type-array`. - - -``ObjectType`` -============== - -Let's add two records which have additional items within the ``details`` field. -Note that item keys have not been defined in the DDL schema, effectively -demonstrating the :ref:`DYNAMIC column policy `. - - >>> arthur = Character(name='Arthur Dent') - >>> arthur.details = {} - >>> arthur.details['gender'] = 'male' - >>> arthur.details['species'] = 'human' - >>> session.add(arthur) - - >>> trillian = Character(name='Tricia McMillan') - >>> trillian.details = {} - >>> trillian.quote = "We're on a space ship Arthur. In space." - >>> trillian.details['gender'] = 'female' - >>> trillian.details['species'] = 'human' - >>> trillian.details['female_only_attribute'] = 1 - >>> session.add(trillian) - - >>> session.commit() - -After ``INSERT`` statements are submitted to the database, the newly inserted -records aren't immediately available for retrieval because the index is only -updated periodically (default: each second). In order to synchronize that, -refresh the table: - - >>> _ = connection.execute(text("REFRESH TABLE characters")) - -A subsequent select query will see all the records: - - >>> query = session.query(Character).order_by(Character.name) - >>> [(c.name, c.details['gender']) for c in query] - [('Arthur Dent', 'male'), ('Tricia McMillan', 'female')] - -It is also possible to just select a part of the document, even inside the -``ObjectType`` type: - - >>> sorted(session.query(Character.details['gender']).all()) - [('female',), ('male',)] - -In addition, filtering on the attributes inside the ``details`` column is also -possible: - - >>> query = session.query(Character.name) - >>> query.filter(Character.details['gender'] == 'male').all() - [('Arthur Dent',)] - -Update dictionary ------------------ - -The SQLAlchemy CrateDB dialect supports change tracking deep down the nested -levels of a ``ObjectType`` type field. For example, the following query will only -update the ``gender`` key. The ``species`` key which is on the same level will -be left untouched. - - >>> char = session.query(Character).filter_by(name='Arthur Dent').one() - >>> char.details['gender'] = 'manly man' - >>> session.commit() - >>> session.refresh(char) - - >>> char.details['gender'] - 'manly man' - - >>> char.details['species'] - 'human' - -Update nested dictionary ------------------------- - - >>> char_nested = Character(id='1234id') - >>> char_nested.details = {"name": {"first": "Arthur", "last": "Dent"}} - >>> session.add(char_nested) - >>> session.commit() - - >>> char_nested = session.query(Character).filter_by(id='1234id').one() - >>> char_nested.details['name']['first'] = 'Trillian' - >>> char_nested.details['size'] = 45 - >>> session.commit() - -Refresh and query "characters" table: - - >>> _ = connection.execute(text("REFRESH TABLE characters")) - >>> session.refresh(char_nested) - - >>> char_nested = session.query(Character).filter_by(id='1234id').one() - >>> pprint(char_nested.details) - {'name': {'first': 'Trillian', 'last': 'Dent'}, 'size': 45} - - -``ObjectArray`` -=============== - -Note that opposed to the ``ObjectType`` type, the ``ObjectArray`` type isn't smart -and doesn't have intelligent change tracking. Therefore, the generated -``UPDATE`` statement will affect the whole list: - - >>> char.more_details = [{'foo': 1, 'bar': 10}, {'foo': 2}] - >>> session.commit() - - >>> char.more_details.append({'foo': 3}) - >>> session.commit() - -This will generate an ``UPDATE`` statement which looks roughly like this:: - - "UPDATE characters SET more_details = ? ...", ([{'foo': 1, 'bar': 10}, {'foo': 2}, {'foo': 3}],) - -.. hidden: - - >>> _ = connection.execute(text("REFRESH TABLE characters")) - >>> session.refresh(char) - -To run queries against fields of ``ObjectArray`` types, use the -``.any(value, operator=operators.eq)`` method on a subscript, because accessing -fields of object arrays (e.g. ``Character.more_details['foo']``) returns an -array of the field type. - -Only one of the objects inside the array has to match in order for the result -to be returned: - - >>> query = session.query(Character.name) - >>> query.filter(Character.more_details['foo'].any(1, operator=operators.eq)).all() - [('Arthur Dent',)] - -Querying a field of an object array will result in an array of -all values of that field of all objects in that object array: - - >>> query = session.query(Character.more_details['foo']).order_by(Character.name) - >>> query.all() - [([1, 2, 3],), (None,), (None,)] - - -Geospatial types -================ - -CrateDB's geospatial types, such as :ref:`crate-reference:type-geo_point` -and :ref:`crate-reference:type-geo_shape`, can also be used within an -SQLAlchemy declarative schema: - - >>> class City(Base): - ... __tablename__ = 'cities' - ... name = sa.Column(sa.String, primary_key=True) - ... coordinate = sa.Column(Geopoint) - ... area = sa.Column(Geoshape) - -One way of inserting these types is using the `geojson`_ library, to create -points or shapes: - - >>> area = Polygon( - ... [ - ... [ - ... (139.806, 35.515), - ... (139.919, 35.703), - ... (139.768, 35.817), - ... (139.575, 35.760), - ... (139.584, 35.619), - ... (139.806, 35.515), - ... ] - ... ] - ... ) - >>> point = Point(coordinates=(139.76, 35.68)) - -These two objects can then be added to an SQLAlchemy model and added to the -session: - - >>> tokyo = City(coordinate=point, area=area, name='Tokyo') - >>> session.add(tokyo) - >>> session.commit() - >>> _ = connection.execute(text("REFRESH TABLE cities")) - -When reading them back, they are retrieved as the corresponding `geojson`_ -objects: - - >>> query = session.query(City.name, City.coordinate, City.area) - >>> query.all() - [('Tokyo', (139.75999999791384, 35.67999996710569), {"coordinates": [[[139.806, 35.515], [139.919, 35.703], [139.768, 35.817], [139.575, 35.76], [139.584, 35.619], [139.806, 35.515]]], "type": "Polygon"})] - - -.. hidden: Disconnect from database - - >>> session.close() - >>> connection.close() - >>> engine.dispose() - - -.. _geojson: https://pypi.org/project/geojson/ diff --git a/docs/conf.py b/docs/conf.py index 12a6d625..01351068 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,15 +11,12 @@ intersphinx_mapping.update({ 'py': ('https://docs.python.org/3/', None), - 'sa': ('https://docs.sqlalchemy.org/en/20/', None), 'urllib3': ('https://urllib3.readthedocs.io/en/1.26.13/', None), - 'dask': ('https://docs.dask.org/en/stable/', None), - 'pandas': ('https://pandas.pydata.org/docs/', None), }) linkcheck_anchors = True -linkcheck_ignore = [r"https://github.com/crate/cratedb-examples/blob/main/by-language/python-sqlalchemy/.*"] +linkcheck_ignore = [] # Disable version chooser. html_context.update({ diff --git a/docs/data-types.rst b/docs/data-types.rst index 2c55e7a7..146bf5b3 100644 --- a/docs/data-types.rst +++ b/docs/data-types.rst @@ -4,9 +4,7 @@ Data types ========== -The :ref:`Database API client ` and the :ref:`SQLAlchemy dialect -` use different Python data types. Consult the corresponding -section for further information. +The data types of the :ref:`CrateDB DBAPI database API client `. .. rubric:: Table of contents @@ -109,65 +107,4 @@ __ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#c preserved. If you need to store it, you will need to use a separate column. -.. _data-types-sqlalchemy: - -SQLAlchemy -========== - -This section documents data types for the CrateDB :ref:`SQLAlchemy dialect -`. - -.. _sqlalchemy-type-map: - -Type map --------- - -The CrateDB dialect maps between data types like so: - -================= ========================================= -CrateDB SQLAlchemy -================= ========================================= -`boolean`__ `Boolean`__ -`byte`__ `SmallInteger`__ -`short`__ `SmallInteger`__ -`integer`__ `Integer`__ -`long`__ `NUMERIC`__ -`float`__ `Float`__ -`double`__ `DECIMAL`__ -`timestamp`__ `TIMESTAMP`__ -`string`__ `String`__ -`array`__ `ARRAY`__ -`object`__ :ref:`object` |nbsp| (extension type) -`array(object)`__ :ref:`objectarray` |nbsp| (extension type) -`geo_point`__ :ref:`geopoint` |nbsp| (extension type) -`geo_shape`__ :ref:`geoshape` |nbsp| (extension type) -================= ========================================= - - -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#boolean -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.Boolean -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#numeric-data -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.SmallInteger -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#numeric-data -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.SmallInteger -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#numeric-data -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.Integer -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#numeric-data -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.NUMERIC -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#numeric-data -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.Float -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#numeric-data -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.DECIMAL -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#dates-and-times -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.TIMESTAMP -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#character-data -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.String -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#array -__ http://docs.sqlalchemy.org/en/latest/core/type_basics.html#sqlalchemy.types.ARRAY -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#object -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#array -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#geo-point -__ https://crate.io/docs/crate/reference/en/latest/general/ddl/data-types.html#geo-shape - - .. _Unix time: https://en.wikipedia.org/wiki/Unix_time diff --git a/docs/getting-started.rst b/docs/getting-started.rst index a0ae8d09..a2847a41 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -19,10 +19,9 @@ Install The CrateDB Python client is available as package `crate`_ on `PyPI`_. -To install the most recent driver version, including the SQLAlchemy dialect -extension, run:: +To install the most recent driver version, run:: - pip install "crate[sqlalchemy]" --upgrade + pip install --upgrade crate After that is done, you can import the library, like so: diff --git a/docs/index.rst b/docs/index.rst index 27e4752e..6b941347 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,8 +16,7 @@ Introduction ************ The Python client library for `CrateDB`_ implements the Python Database API -Specification v2.0 (`PEP 249`_), and also includes the :ref:`CrateDB dialect -` for `SQLAlchemy`_. +Specification v2.0 (`PEP 249`_). The Python driver can be used to connect to both `CrateDB`_ and `CrateDB Cloud`_, and is verified to work on Linux, macOS, and Windows. It is used by @@ -28,14 +27,15 @@ it has also been tested successfully with `PyPy`_. Please make sure to also visit the section about :ref:`other-options`, using the :ref:`crate-reference:interface-postgresql` interface of `CrateDB`_. +The :ref:`CrateDB dialect ` for `SQLAlchemy`_ is provided +by the ``sqlalchemy-cratedb`` package. + ************* Documentation ************* -For general help about the Python Database API, or SQLAlchemy, please consult -`PEP 249`_, the `SQLAlchemy tutorial`_, and the general `SQLAlchemy -documentation`_. +For general help about the Python Database API, please consult `PEP 249`_. For more detailed information about how to install the client driver, how to connect to a CrateDB cluster, and how to run queries, consult the resources referenced below. @@ -86,77 +86,12 @@ Connect to `CrateDB Cloud`_. password="") -SQLAlchemy -========== - -The CrateDB dialect for `SQLAlchemy`_ offers convenient ORM access and supports -CrateDB's ``OBJECT``, ``ARRAY``, and geospatial data types using `GeoJSON`_, -supporting different kinds of `GeoJSON geometry objects`_. - -.. toctree:: - :maxdepth: 2 - - sqlalchemy - -Install package from PyPI with DB API and SQLAlchemy support. - -.. code-block:: shell - - pip install 'crate[sqlalchemy]' pandas - -Connect to CrateDB instance running on ``localhost``. - -.. code-block:: python - - # Connect using SQLAlchemy Core. - import pkg_resources - import sqlalchemy as sa - from pprint import pp - - pkg_resources.require("sqlalchemy>=2.0") - - dburi = "crate://localhost:4200" - query = "SELECT country, mountain, coordinates, height FROM sys.summits ORDER BY country;" - - engine = sa.create_engine(dburi, echo=True) - with engine.connect() as connection: - with connection.execute(sa.text(query)) as result: - pp(result.mappings().fetchall()) - -Connect to `CrateDB Cloud`_. - -.. code-block:: python - - # Connect using SQLAlchemy Core. - import sqlalchemy as sa - dburi = "crate://admin:@example.aks1.westeurope.azure.cratedb.net:4200?ssl=true" - engine = sa.create_engine(dburi, echo=True) - -Load results into `pandas`_ DataFrame. - -.. code-block:: python - - # Connect using SQLAlchemy Core and pandas. - import pandas as pd - import sqlalchemy as sa - - dburi = "crate://localhost:4200" - query = "SELECT * FROM sys.summits ORDER BY country;" - - engine = sa.create_engine(dburi, echo=True) - with engine.connect() as connection: - df = pd.read_sql(sql=sa.text(query), con=connection) - df.info() - print(df) - - Data types ========== -The DB API driver and the SQLAlchemy dialect support :ref:`CrateDB's data types +The DB API driver supports :ref:`CrateDB's data types ` to different degrees. For more information, -please consult the :ref:`data-types` and :ref:`SQLAlchemy extension types -` documentation pages. +please consult the :ref:`data-types` documentation page. .. toctree:: :maxdepth: 2 @@ -168,11 +103,13 @@ Examples - The :ref:`by-example` section enumerates concise examples demonstrating the different API interfaces of the CrateDB Python client library. Those are - DB API, HTTP, and BLOB interfaces, and the SQLAlchemy dialect. + DB API, HTTP, and BLOB interfaces. - Executable code examples are maintained within the `cratedb-examples repository`_. - The `sample application`_ and the corresponding `sample application documentation`_ demonstrate the use of the driver on behalf of an example "guestbook" application. +- ``sqlalchemy-cratedb`` has relevant code snippets about how to + connect to CrateDB using `SQLAlchemy`_, `pandas`_, and `Dask`_. - `Use CrateDB with pandas`_ has corresponding code snippets about how to connect to CrateDB using `pandas`_, and how to load and export data. - The `Apache Superset`_ and `FIWARE QuantumLeap data historian`_ projects. @@ -223,6 +160,7 @@ The project is licensed under the terms of the Apache 2.0 license, like .. _CrateDB Cloud: https://console.cratedb.cloud/ .. _CrateDB source: https://github.com/crate/crate .. _Create an issue: https://github.com/crate/crate-python/issues +.. _Dask: https://en.wikipedia.org/wiki/Dask_(software) .. _development sandbox: https://github.com/crate/crate-python/blob/master/DEVELOP.rst .. _cratedb-examples repository: https://github.com/crate/cratedb-examples/tree/main/by-language .. _FIWARE QuantumLeap data historian: https://github.com/orchestracities/ngsi-timeseries-api @@ -230,12 +168,10 @@ The project is licensed under the terms of the Apache 2.0 license, like .. _GeoJSON geometry objects: https://tools.ietf.org/html/rfc7946#section-3.1 .. _LICENSE: https://github.com/crate/crate-python/blob/master/LICENSE .. _managed on GitHub: https://github.com/crate/crate-python -.. _pandas: https://pandas.pydata.org/ +.. _pandas: https://en.wikipedia.org/wiki/Pandas_(software) .. _PEP 249: https://peps.python.org/pep-0249/ .. _PyPy: https://www.pypy.org/ .. _sample application: https://github.com/crate/crate-sample-apps/tree/main/python-flask .. _sample application documentation: https://github.com/crate/crate-sample-apps/blob/main/python-flask/documentation.md -.. _SQLAlchemy: https://www.sqlalchemy.org/ -.. _SQLAlchemy documentation: https://docs.sqlalchemy.org/ -.. _SQLAlchemy tutorial: https://docs.sqlalchemy.org/en/latest/tutorial/ +.. _SQLAlchemy: https://en.wikipedia.org/wiki/Sqlalchemy .. _Use CrateDB with pandas: https://github.com/crate/crate-qa/pull/246 diff --git a/docs/sqlalchemy.rst b/docs/sqlalchemy.rst index 8c399a5c..caf5ca8d 100644 --- a/docs/sqlalchemy.rst +++ b/docs/sqlalchemy.rst @@ -5,715 +5,13 @@ SQLAlchemy support ================== -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 2 - - -Introduction -============ - `SQLAlchemy`_ is the most popular `Object-Relational Mapping`_ (ORM) library for Python. -The CrateDB Python client library provides support for SQLAlchemy. An -:ref:`SQLAlchemy dialect ` for CrateDB is registered at -installation time and can be used without further configuration. - -The CrateDB SQLAlchemy dialect is validated to work with SQLAlchemy versions -``1.3``, ``1.4``, and ``2.0``. - -.. SEEALSO:: - - For general help using SQLAlchemy, consult the :ref:`SQLAlchemy tutorial - ` or the `SQLAlchemy library`_. - - Supplementary information about the CrateDB SQLAlchemy dialect can be found - in the :ref:`data types appendix `. - - Code examples for using the CrateDB SQLAlchemy dialect can be found at - :ref:`sqlalchemy-by-example`. - - -.. _connecting: - -Connecting -========== - -.. _database-urls: - -Database URLs -------------- - -In an SQLAlchemy context, database addresses are represented by *Uniform Resource -Locators* (URL_) called :ref:`sa:database_urls`. - -The simplest database URL for CrateDB looks like this:: - - crate:///[?option=value] - -Here, ```` is the node *host string*. After the host, additional query -parameters can be specified to adjust some connection settings. - -A host string looks like this:: - - [:@]: - -Here, ```` is the hostname or IP address of the CrateDB node and -```` is a valid :ref:`crate-reference:psql.port` number. - -When authentication is needed, the credentials can be optionally supplied using -``:@``. For connecting to an SSL-secured HTTP endpoint, you -can add the query parameter ``?ssl=true`` to the database URI. - -Example database URIs: - -- ``crate://localhost:4200`` -- ``crate://crate-1.vm.example.com:4200`` -- ``crate://username:password@crate-2.vm.example.com:4200/?ssl=true`` -- ``crate://198.51.100.1:4200`` - -.. TIP:: - - If ```` is blank (i.e. the database URI is just ``crate://``), then - ``localhost:4200`` will be assumed. - -Getting a connection --------------------- - -Create an engine -................ - -You can connect to CrateDB using the ``create_engine`` method. This method -takes a :ref:`database URL `. - -Import the ``sa`` module, like so: - - >>> import sqlalchemy as sa - -To connect to ``localhost:4200``, you can do this: - - >>> engine = sa.create_engine('crate://') - -To connect to ``crate-1.vm.example.com:4200``, you would do this: - - >>> engine = sa.create_engine('crate://crate-1.vm.example.com:4200') - -If your CrateDB cluster has multiple nodes, however, we recommend that you -configure all of them. You can do that by specifying the ``crate://`` database -URL and passing in a list of :ref:`host strings ` passed using -the ``connect_args`` argument, like so: - - >>> engine = sa.create_engine('crate://', connect_args={ - ... 'servers': ['198.51.100.1:4200', '198.51.100.2:4200'] - ... }) - -When you do this, the Database API layer will use its :ref:`round-robin -` implementation. - -The client validates :ref:`SSL server certificates ` -by default. For further adjusting this behaviour, SSL verification options can -be passed in by using the ``connect_args`` dictionary. - -For example, use ``ca_cert`` for providing a path to the CA certificate used -for signing the server certificate: - - >>> engine = sa.create_engine( - ... 'crate://', - ... connect_args={ - ... 'servers': ['198.51.100.1:4200', '198.51.100.2:4200'], - ... 'ca_cert': '', - ... } - ... ) - -In order to disable SSL verification, use ``verify_ssl_cert = False``, like: - - >>> engine = sa.create_engine( - ... 'crate://', - ... connect_args={ - ... 'servers': ['198.51.100.1:4200', '198.51.100.2:4200'], - ... 'verify_ssl_cert': False, - ... } - ... ) - - -Get a session -............. - -Once you have an CrateDB ``engine`` set up, you can create and use an SQLAlchemy -``Session`` object to execute queries: - - >>> from sqlalchemy.orm import sessionmaker - - >>> Session = sessionmaker(bind=engine) - >>> session = Session() - -.. SEEALSO:: - - SQLAlchemy has more documentation about this topic on :doc:`sa:orm/session_basics`. - - -.. _cloud-connect: - -Connecting to CrateDB Cloud -........................... - -Connecting to `CrateDB Cloud`_ works like this. Please note the ``?ssl=true`` -query parameter at the end of the database URI. - - >>> import sqlalchemy as sa - >>> dburi = "crate://admin:@example.aks1.westeurope.azure.cratedb.net:4200?ssl=true" - >>> engine = sa.create_engine(dburi, echo=True) - - -.. _tables: - -Tables -====== - -.. _table-definition: - -Table definition ----------------- - -Here is an example SQLAlchemy table definition using the :ref:`declarative -system `: - - >>> from sqlalchemy.ext import declarative - >>> from crate.client.sqlalchemy import types - >>> from uuid import uuid4 - - >>> def gen_key(): - ... return str(uuid4()) - - >>> Base = declarative.declarative_base(bind=engine) - - >>> class Character(Base): - ... - ... __tablename__ = 'characters' - ... __table_args__ = { - ... 'crate_number_of_shards': 3 - ... } - ... - ... id = sa.Column(sa.String, primary_key=True, default=gen_key) - ... name = sa.Column(sa.String, crate_index=False) - ... name_normalized = sa.Column(sa.String, sa.Computed("lower(name)")) - ... quote = sa.Column(sa.String, nullable=False) - ... details = sa.Column(types.ObjectType) - ... more_details = sa.Column(types.ObjectArray) - ... name_ft = sa.Column(sa.String) - ... quote_ft = sa.Column(sa.String) - ... even_more_details = sa.Column(sa.String, crate_columnstore=False) - ... created_at = sa.Column(sa.DateTime, server_default=sa.func.now()) - ... - ... __mapper_args__ = { - ... 'exclude_properties': ['name_ft', 'quote_ft'] - ... } - -In this example, we: - -- Define a ``gen_key`` function that produces :py:mod:`UUIDs ` -- Set up a ``Base`` class for the table -- Create the ``Characters`` class for the ``characters`` table -- Use the ``gen_key`` function to provide a default value for the ``id`` column - (which is also the primary key) -- Use standard SQLAlchemy types for the ``id``, ``name``, and ``quote`` columns -- Use ``nullable=False`` to define a ``NOT NULL`` constraint -- Disable indexing of the ``name`` column using ``crate_index=False`` -- Define a computed column ``name_normalized`` (based on ``name``) that - translates into a generated column -- Use the `ObjectType`_ extension type for the ``details`` column -- Use the `ObjectArray`_ extension type for the ``more_details`` column -- Set up the ``name_ft`` and ``quote_ft`` fulltext indexes, but exclude them from - the mapping (so SQLAlchemy doesn't try to update them as if they were columns) -- Disable the columnstore of the ``even_more_details`` column using ``crate_columnstore=False`` -- Add a ``created_at`` column whose default value is set by CrateDB's ``now()`` function. - -.. TIP:: - - This example table is used throughout the rest of this document. - -.. SEEALSO:: - - The SQLAlchemy documentation has more information about - :ref:`sa:metadata_describing`. - - -Additional ``__table_args__`` -............................. - - -The example also shows the optional usage of ``__table_args__`` to configure -table-wide attributes. The following attributes can optionally be configured: - -- ``crate_number_of_shards``: The number of primary shards the table will be - split into -- ``crate_clustered_by``: The routing column to use for sharding -- ``crate_number_of_replicas``: The number of replicas to allocate for each - primary shard -- ``crate_partitioned_by``: One or more columns to use as a partition key - -.. SEEALSO:: - - The :ref:`CREATE TABLE ` documentation - contains more information on each of the attributes. - - -``_id`` as primary key -...................... - -As with version 4.2 CrateDB supports the ``RETURNING`` clause, which makes it -possible to use the ``_id`` column as fetched value for the ``PRIMARY KEY`` -constraint, since the SQLAlchemy ORM always **requires** a primary key. - -A table schema like this - -.. code-block:: sql - - CREATE TABLE "doc"."logs" ( - "ts" TIMESTAMP WITH TIME ZONE NOT NULL, - "level" TEXT, - "message" TEXT - ) - -would translate into the following declarative model: - - >>> from sqlalchemy.schema import FetchedValue - - >>> class Log(Base): - ... - ... __tablename__ = 'logs' - ... __mapper_args__ = { - ... 'exclude_properties': ['id'] - ... } - ... - ... id = sa.Column("_id", sa.String, server_default=FetchedValue(), primary_key=True) - ... ts = sa.Column(sa.DateTime, server_default=sa.func.current_timestamp()) - ... level = sa.Column(sa.String) - ... message = sa.Column(sa.String) - - >>> log = Log(level="info", message="Hello World") - >>> session.add(log) - >>> session.commit() - >>> log.id - ... - - -Auto-generated primary key -.......................... - -CrateDB 4.5.0 added the :ref:`gen_random_text_uuid() ` -scalar function, which can also be used within an SQL DDL statement, in order to automatically -assign random identifiers to newly inserted records on the server side. - -In this spirit, it is suitable to be used as a ``PRIMARY KEY`` constraint for SQLAlchemy. - -A table schema like this - -.. code-block:: sql - - CREATE TABLE "doc"."items" ( - "id" STRING DEFAULT gen_random_text_uuid() NOT NULL PRIMARY KEY, - "name" STRING - ) - -would translate into the following declarative model: - - >>> class Item(Base): - ... - ... __tablename__ = 'items' - ... - ... id = sa.Column("id", sa.String, server_default=func.gen_random_text_uuid(), primary_key=True) - ... name = sa.Column("name", sa.String) - - >>> item = Item(name="Foobar") - >>> session.add(item) - >>> session.commit() - >>> item.id - ... - - -.. _using-extension-types: - -Extension types ---------------- - -In the :ref:`example SQLAlchemy table definition ` above, we -are making use of the two extension data types that the CrateDB SQLAlchemy -dialect provides. - -.. SEEALSO:: - - The appendix has a full :ref:`data types reference `. - -.. _object: -.. _objecttype: - -``ObjectType`` -.............. - -Objects are a common, and useful, data type when using CrateDB, so the CrateDB -SQLAlchemy dialect provides a custom ``Object`` type extension for working with -these values. - -Here's how you use the :doc:`SQLAlchemy Session ` to -insert two records: - - >>> # use the crate engine from earlier examples - >>> Session = sessionmaker(bind=crate) - >>> session = Session() - - >>> arthur = Character(name='Arthur Dent') - >>> arthur.details = {} - >>> arthur.details['gender'] = 'male' - >>> arthur.details['species'] = 'human' - >>> session.add(arthur) - - >>> trillian = Character(name='Tricia McMillan') - >>> trillian.details = {} - >>> trillian.quote = "We're on a space ship Arthur. In space." - >>> trillian.details['gender'] = 'female' - >>> trillian.details['species'] = 'human' - >>> trillian.details['female_only_attribute'] = 1 - >>> session.add(trillian) - >>> session.commit() - -.. NOTE:: - - The information we supply via the ``details`` column isn't defined in the - :ref:`original SQLAlchemy table definition ` schema. - These details can be specified as *object column policy* when you create - the column in CrateDB, you can either use the :ref:`STRICT column policy - `, or the :ref:`DYNAMIC column - policy `. - -.. NOTE:: - - Behind the scenes, if you update an ``ObjectType`` property, and ``commit`` that - change, the :ref:`UPDATE ` statement sent - to CrateDB will only include the data necessary to update the changed - sub-columns. - -.. _objectarray: - -``ObjectArray`` -............... - -In addition to the `ObjectType`_ type, the CrateDB SQLAlchemy dialect also provides -an ``ObjectArray`` type, which is structured as a :class:`py:list` of -:class:`dictionaries `. - -Here's how you might set the value of an ``ObjectArray`` column: - - >>> arthur.more_details = [{'foo': 1, 'bar': 10}, {'foo': 2}] - >>> session.commit() - -If you append an object, like this: - - >>> arthur.more_details.append({'foo': 3}) - >>> session.commit() - -The resulting object will look like this: - - >>> arthur.more_details - [{'foo': 1, 'bar': 10}, {'foo': 2}, {'foo': 3}] - -.. CAUTION:: - - Behind the scenes, if you update an ``ObjectArray``, and ``commit`` that - change, the :ref:`UPDATE ` statement - sent to CrateDB will include all of the ``ObjectArray`` data. - -.. _geopoint: -.. _geoshape: - -``Geopoint`` and ``Geoshape`` -............................. - -The CrateDB SQLAlchemy dialect provides two geospatial types: - -- ``Geopoint``, which represents a longitude and latitude coordinate -- ``Geoshape``, which is used to store geometric `GeoJSON geometry objects`_ - -To use these types, you can create columns, like so: - - >>> class City(Base): - ... - ... __tablename__ = 'cities' - ... name = sa.Column(sa.String, primary_key=True) - ... coordinate = sa.Column(types.Geopoint) - ... area = sa.Column(types.Geoshape) - -A geopoint can be created in multiple ways. Firstly, you can define it as a -:py:class:`py:tuple` of ``(longitude, latitude)``: - - >>> point = (139.76, 35.68) - -Secondly, you can define it as a geojson ``Point`` object: - - >>> from geojson import Point - >>> point = Point(coordinates=(139.76, 35.68)) - -To create a geoshape, you can use a geojson shape object, such as a ``Polygon``: - - >>> from geojson import Point, Polygon - >>> area = Polygon( - ... [ - ... [ - ... (139.806, 35.515), - ... (139.919, 35.703), - ... (139.768, 35.817), - ... (139.575, 35.760), - ... (139.584, 35.619), - ... (139.806, 35.515), - ... ] - ... ] - ... ) - -You can then set the values of the ``Geopoint`` and ``Geoshape`` columns: - - >>> tokyo = City(name="Tokyo", coordinate=point, area=area) - >>> session.add(tokyo) - >>> session.commit() - -Querying -======== - -When the ``commit`` method is called, two ``INSERT`` statements are sent to -CrateDB. However, the newly inserted rows aren't immediately available for -querying because the table index is only updated periodically (one second, by -default, which is a short time for me and you, but a long time for your code). - -You can request a :ref:`table refresh ` to update -the index manually: - - >>> connection = engine.connect() - >>> _ = connection.execute(text("REFRESH TABLE characters")) - -.. NOTE:: - - Newly inserted rows can still be queried immediately if a lookup by primary - key is done. - -Here's what a regular select might look like: - - >>> query = session.query(Character).order_by(Character.name) - >>> [(c.name, c.details['gender']) for c in query] - [('Arthur Dent', 'male'), ('Tricia McMillan', 'female')] - -You can also select a portion of each record, and this even works inside -`ObjectType`_ columns: - - >>> sorted(session.query(Character.details['gender']).all()) - [('female',), ('male',)] - -You can also filter on attributes inside the `ObjectType`_ column: - - >>> query = session.query(Character.name) - >>> query.filter(Character.details['gender'] == 'male').all() - [('Arthur Dent',)] - -To filter on an `ObjectArray`_, you have to do something like this: - - >>> from sqlalchemy.sql import operators - - >>> query = session.query(Character.name) - >>> query.filter(Character.more_details['foo'].any(1, operator=operators.eq)).all() - [(u'Arthur Dent',)] - -Here, we're using SQLAlchemy's :py:meth:`any ` -method along with Python's :py:func:`py:operator.eq` function, in order to -match the value ``1`` against the key ``foo`` of any dictionary in the -``more_details`` list. - -Only one of the keys has to match for the row to be returned. - -This works, because ``ObjectArray`` keys return a list of all values for that -key, like so: - - >>> arthur.more_details['foo'] - [1, 2, 3] - -Querying a key of an ``ObjectArray`` column will return all values for that key -for all matching rows: - - >>> query = session.query(Character.more_details['foo']).order_by(Character.name) - >>> query.all() - [([1, 2, 3],), (None,)] - -.. _aggregate-functions: - -Aggregate functions -------------------- - -SQLAlchemy supports different ways to `count result rows`_. However, because -CrateDB doesn't support subqueries, counts must be written in one of the -following two ways. - -This counts the number of character records by counting the number of ``id`` -values in the table: - - >>> session.query(sa.func.count(Character.id)).scalar() - 2 - -.. NOTE:: - - If you're doing it like this, the column you select must be the primary - key. - -And this counts the number of character records by selecting all columns, and -then counting the number of rows: - - >>> session.query(sa.func.count('*')).select_from(Character).scalar() - 2 - -You can layer in calls to ``group_by`` and ``order_by`` when you use one of -these methods, like so: - - >>> session.query(sa.func.count(Character.id), Character.name) \ - ... .group_by(Character.name) \ - ... .order_by(sa.desc(sa.func.count(Character.id))) \ - ... .order_by(Character.name).all() - [(1, u'Arthur Dent'), (1, u'Tricia McMillan')] - -Fulltext search ---------------- - -Matching -........ - -Fulltext Search in CrateDB is done with the :ref:`crate-reference:predicates_match`. - -The CrateDB SQLAlchemy dialect provides a ``match`` function in the -``predicates`` module, which can be used to search one or multiple fields. - -Here's an example use of the ``match`` function: - - >>> from crate.client.sqlalchemy.predicates import match - - >>> session.query(Character.name) \ - ... .filter(match(Character.name_ft, 'Arthur')) \ - ... .all() - [('Arthur Dent',)] - -In this example, we're selecting character ``name`` values, and returning all -rows where the ``name_ft`` index matches the string ``Arthur``. - -.. NOTE:: - - To use fulltext searches on a column, an explicit fulltext index with an - analyzer must be created on the column. Consult the documentation about - :ref:`crate-reference:fulltext-indices` for more information. - -The ``match`` function takes the following options:: - - match(column, term, match_type=None, options=None) - -:``column``: - - A reference to a column or an index:: - - match(Character.name_ft, 'Trillian') - - Or a subcolumn:: - - match(Character.details['name']['first'], 'Trillian') - - Or a dictionary of the same, with `boost values`_:: - - match({Character.name_ft: 0.5, - Character.details['name']['first']: 0.8, - Character.details['name']['last']: 0.2}, - 'Trillian') - - .. SEEALSO:: - - The `arguments reference`_ of the :ref:`crate-reference:predicates_match` - has more in-depth information. - -:``term``: - - The term to match against. - - This string is analyzed and the resulting tokens are compared to the index. - -:``match_type``: *(optional)* - - The :ref:`crate-reference:predicates_match_types`. - - Determine how the ``term`` is applied and the :ref:`_score - ` gets calculated. - See also `score usage`_. - - Here's an example:: - - match({Character.name_ft: 0.5, - Character.details['name']['first']: 0.8, - Character.details['name']['last']: 0.2}, - 'Trillian', - match_type='phrase') - -:``options``: *(optional)* - - The `match options`_. - - Specify match type behaviour. (Not possible without a specified match type.) - - Match options must be supplied as a dictionary:: - - match({Character.name_ft: 0.5, - Character.details['name']['first']: 0.8, - Character.details['name']['last']: 0.2}, - 'Trillian', - match_type='phrase' - options={ - 'fuzziness': 3, - 'analyzer': 'english'}) - -Relevance -......... - -To get the relevance of a matching row, the row :ref:`_score -` can be used. -See also `score usage`_. - -The score is relative to other result rows produced by your query. The higher -the score, the more relevant the result row. - - .. COMMENT - - Keep this anonymous link in place so it doesn't get lost. We have to use - this link format because of the leading underscore. - -The score is made available via the ``_score`` column, which is a virtual -column, meaning that it doesn't exist on the source table, and in most cases, -should not be included in your :ref:`table definition `. - -You can select ``_score`` as part of a query, like this: - - >>> session.query(Character.name, '_score') \ - ... .filter(match(Character.quote_ft, 'space')) \ - ... .all() - [('Tricia McMillan', ...)] - -Here, we're matching the term ``space`` against the ``quote_ft`` fulltext -index. And we're selecting the ``name`` column of the character by using the -table definition But notice that we select the associated score by passing in -the virtual column name as a string (``_score``) instead of using a defined -column on the ``Character`` class. +The `SQLAlchemy`_ CrateDB dialect is provided by the `sqlalchemy-cratedb`_ +package. -.. _arguments reference: https://crate.io/docs/crate/reference/en/latest/general/dql/fulltext.html#arguments -.. _boost values: https://crate.io/docs/crate/reference/en/latest/general/dql/fulltext.html#arguments -.. _count result rows: https://docs.sqlalchemy.org/en/14/orm/tutorial.html#counting -.. _CrateDB Cloud: https://console.cratedb.cloud/ -.. _Database API: https://www.python.org/dev/peps/pep-0249/ -.. _geojson geometry objects: https://www.rfc-editor.org/rfc/rfc7946#section-3.1 -.. _match options: https://crate.io/docs/crate/reference/en/latest/general/dql/fulltext.html#options .. _Object-Relational Mapping: https://en.wikipedia.org/wiki/Object-relational_mapping -.. _score usage: https://crate.io/docs/crate/reference/en/latest/general/dql/fulltext.html#usage .. _SQLAlchemy: https://www.sqlalchemy.org/ -.. _SQLAlchemy library: https://www.sqlalchemy.org/library.html -.. _URL: https://en.wikipedia.org/wiki/Uniform_Resource_Locator +.. _sqlalchemy-cratedb: https://github.com/crate-workbench/sqlalchemy-cratedb diff --git a/setup.py b/setup.py index 3ecbf9c1..63f75a93 100644 --- a/setup.py +++ b/setup.py @@ -50,26 +50,19 @@ def read(path): long_description_content_type='text/x-rst', platforms=['any'], license='Apache License 2.0', - keywords='crate db api sqlalchemy', + keywords='cratedb db api dbapi database sql http rdbms olap', packages=find_packages('src'), namespace_packages=['crate'], - entry_points={ - 'sqlalchemy.dialects': [ - 'crate = crate.client.sqlalchemy:CrateDialect' - ] - }, install_requires=[ 'urllib3<2.3', 'verlib2==0.2.0', ], extras_require=dict( - sqlalchemy=['sqlalchemy>=1.0,<2.1', - 'geojson>=2.5.0,<4', - 'backports.zoneinfo<1; python_version<"3.9"'], test=['tox>=3,<5', 'zope.testing>=4,<6', 'zope.testrunner>=5,<7', 'zc.customdoctests>=1.0.1,<2', + 'backports.zoneinfo<1; python_version<"3.9"', 'certifi', 'createcoverage>=1,<2', 'dask[dataframe]', diff --git a/src/crate/client/sqlalchemy/__init__.py b/src/crate/client/sqlalchemy/__init__.py deleted file mode 100644 index 41104f4b..00000000 --- a/src/crate/client/sqlalchemy/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from .compat.api13 import monkeypatch_add_exec_driver_sql -from .dialect import CrateDialect -from .sa_version import SA_1_4, SA_2_0, SA_VERSION # noqa: F401 - - -if SA_VERSION < SA_1_4: - import textwrap - import warnings - - # SQLAlchemy 1.3 is effectively EOL. - SA13_DEPRECATION_WARNING = textwrap.dedent(""" - WARNING: SQLAlchemy 1.3 is effectively EOL. - - SQLAlchemy 1.3 is EOL since 2023-01-27. - Future versions of the CrateDB SQLAlchemy dialect will drop support for SQLAlchemy 1.3. - It is recommended that you transition to using SQLAlchemy 1.4 or 2.0: - - - https://docs.sqlalchemy.org/en/14/changelog/migration_14.html - - https://docs.sqlalchemy.org/en/20/changelog/migration_20.html - """.lstrip("\n")) - warnings.warn(message=SA13_DEPRECATION_WARNING, category=DeprecationWarning) - - # SQLAlchemy 1.3 does not have the `exec_driver_sql` method, so add it. - monkeypatch_add_exec_driver_sql() - - -__all__ = [ - CrateDialect, -] diff --git a/src/crate/client/sqlalchemy/compat/__init__.py b/src/crate/client/sqlalchemy/compat/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/crate/client/sqlalchemy/compat/api13.py b/src/crate/client/sqlalchemy/compat/api13.py deleted file mode 100644 index bcd2a6ed..00000000 --- a/src/crate/client/sqlalchemy/compat/api13.py +++ /dev/null @@ -1,156 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -""" -Compatibility module for running a subset of SQLAlchemy 2.0 programs on -SQLAlchemy 1.3. By using monkey-patching, it can do two things: - -1. Add the `exec_driver_sql` method to SA's `Connection` and `Engine`. -2. Amend the `sql.select` function to accept the calling semantics of - the modern variant. - -Reason: `exec_driver_sql` gets used within the CrateDB dialect already, -and the new calling semantics of `sql.select` already get used within -many of the test cases already. Please note that the patch for -`sql.select` is only applied when running the test suite. -""" - -import collections.abc as collections_abc - -from sqlalchemy import exc -from sqlalchemy.sql import Select -from sqlalchemy.sql import select as original_select -from sqlalchemy.util import immutabledict - - -# `_distill_params_20` copied from SA14's `sqlalchemy.engine.{base,util}`. -_no_tuple = () -_no_kw = immutabledict() - - -def _distill_params_20(params): - if params is None: - return _no_tuple, _no_kw - elif isinstance(params, list): - # collections_abc.MutableSequence): # avoid abc.__instancecheck__ - if params and not isinstance(params[0], (collections_abc.Mapping, tuple)): - raise exc.ArgumentError( - "List argument must consist only of tuples or dictionaries" - ) - - return (params,), _no_kw - elif isinstance( - params, - (tuple, dict, immutabledict), - # only do abc.__instancecheck__ for Mapping after we've checked - # for plain dictionaries and would otherwise raise - ) or isinstance(params, collections_abc.Mapping): - return (params,), _no_kw - else: - raise exc.ArgumentError("mapping or sequence expected for parameters") - - -def exec_driver_sql(self, statement, parameters=None, execution_options=None): - """ - Adapter for `exec_driver_sql`, which is available since SA14, for SA13. - """ - if execution_options is not None: - raise ValueError( - "SA13 backward-compatibility: " - "`exec_driver_sql` does not support `execution_options`" - ) - args_10style, kwargs_10style = _distill_params_20(parameters) - return self.execute(statement, *args_10style, **kwargs_10style) - - -def monkeypatch_add_exec_driver_sql(): - """ - Transparently add SA14's `exec_driver_sql()` method to SA13. - - AttributeError: 'Connection' object has no attribute 'exec_driver_sql' - AttributeError: 'Engine' object has no attribute 'exec_driver_sql' - """ - from sqlalchemy.engine.base import Connection, Engine - - # Add `exec_driver_sql` method to SA's `Connection` and `Engine` classes. - Connection.exec_driver_sql = exec_driver_sql - Engine.exec_driver_sql = exec_driver_sql - - -def select_sa14(*columns, **kw) -> Select: - """ - Adapt SA14/SA20's calling semantics of `sql.select()` to SA13. - - With SA20, `select()` no longer accepts varied constructor arguments, only - the "generative" style of `select()` will be supported. The list of columns - / tables to select from should be passed positionally. - - Derived from https://github.com/sqlalchemy/alembic/blob/b1fad6b6/alembic/util/sqla_compat.py#L557-L558 - - sqlalchemy.exc.ArgumentError: columns argument to select() must be a Python list or other iterable - """ - if isinstance(columns, tuple) and isinstance(columns[0], list): - if "whereclause" in kw: - raise ValueError( - "SA13 backward-compatibility: " - "`whereclause` is both in kwargs and columns tuple" - ) - columns, whereclause = columns - kw["whereclause"] = whereclause - return original_select(columns, **kw) - - -def monkeypatch_amend_select_sa14(): - """ - Make SA13's `sql.select()` transparently accept calling semantics of SA14 - and SA20, by swapping in the newer variant of `select_sa14()`. - - This supports the test suite of `crate-python`, because it already uses the - modern calling semantics. - """ - import sqlalchemy - - sqlalchemy.select = select_sa14 - sqlalchemy.sql.select = select_sa14 - sqlalchemy.sql.expression.select = select_sa14 - - -@property -def connectionfairy_driver_connection_sa14(self): - """The connection object as returned by the driver after a connect. - - .. versionadded:: 1.4.24 - - .. seealso:: - - :attr:`._ConnectionFairy.dbapi_connection` - - :attr:`._ConnectionRecord.driver_connection` - - :ref:`faq_dbapi_connection` - - """ - return self.connection - - -def monkeypatch_add_connectionfairy_driver_connection(): - import sqlalchemy.pool.base - sqlalchemy.pool.base._ConnectionFairy.driver_connection = connectionfairy_driver_connection_sa14 diff --git a/src/crate/client/sqlalchemy/compat/core10.py b/src/crate/client/sqlalchemy/compat/core10.py deleted file mode 100644 index 92c62dd8..00000000 --- a/src/crate/client/sqlalchemy/compat/core10.py +++ /dev/null @@ -1,264 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql.base import PGCompiler -from sqlalchemy.sql.crud import (REQUIRED, _create_bind_param, - _extend_values_for_multiparams, - _get_multitable_params, - _get_stmt_parameters_params, - _key_getters_for_crud_column, _scan_cols, - _scan_insert_from_select_cols) - -from crate.client.sqlalchemy.compiler import CrateCompiler - - -class CrateCompilerSA10(CrateCompiler): - - def returning_clause(self, stmt, returning_cols): - """ - Generate RETURNING clause, PostgreSQL-compatible. - """ - return PGCompiler.returning_clause(self, stmt, returning_cols) - - def visit_update(self, update_stmt, **kw): - """ - used to compile expressions - Parts are taken from the SQLCompiler base class. - """ - - # [10] CrateDB patch. - if not update_stmt.parameters and \ - not hasattr(update_stmt, '_crate_specific'): - return super().visit_update(update_stmt, **kw) - - self.isupdate = True - - extra_froms = update_stmt._extra_froms - - text = 'UPDATE ' - - if update_stmt._prefixes: - text += self._generate_prefixes(update_stmt, - update_stmt._prefixes, **kw) - - table_text = self.update_tables_clause(update_stmt, update_stmt.table, - extra_froms, **kw) - - dialect_hints = None - if update_stmt._hints: - dialect_hints, table_text = self._setup_crud_hints( - update_stmt, table_text - ) - - # [10] CrateDB patch. - crud_params = _get_crud_params(self, update_stmt, **kw) - - text += table_text - - text += ' SET ' - - # [10] CrateDB patch begin. - include_table = \ - extra_froms and self.render_table_with_column_in_update_from - - set_clauses = [] - - for k, v in crud_params: - clause = k._compiler_dispatch(self, - include_table=include_table) + \ - ' = ' + v - set_clauses.append(clause) - - for k, v in update_stmt.parameters.items(): - if isinstance(k, str) and '[' in k: - bindparam = sa.sql.bindparam(k, v) - set_clauses.append(k + ' = ' + self.process(bindparam)) - - text += ', '.join(set_clauses) - # [10] CrateDB patch end. - - if self.returning or update_stmt._returning: - if not self.returning: - self.returning = update_stmt._returning - if self.returning_precedes_values: - text += " " + self.returning_clause( - update_stmt, self.returning) - - if extra_froms: - extra_from_text = self.update_from_clause( - update_stmt, - update_stmt.table, - extra_froms, - dialect_hints, - **kw) - if extra_from_text: - text += " " + extra_from_text - - if update_stmt._whereclause is not None: - t = self.process(update_stmt._whereclause) - if t: - text += " WHERE " + t - - limit_clause = self.update_limit_clause(update_stmt) - if limit_clause: - text += " " + limit_clause - - if self.returning and not self.returning_precedes_values: - text += " " + self.returning_clause( - update_stmt, self.returning) - - return text - - -def _get_crud_params(compiler, stmt, **kw): - """create a set of tuples representing column/string pairs for use - in an INSERT or UPDATE statement. - - Also generates the Compiled object's postfetch, prefetch, and - returning column collections, used for default handling and ultimately - populating the ResultProxy's prefetch_cols() and postfetch_cols() - collections. - - """ - - compiler.postfetch = [] - compiler.insert_prefetch = [] - compiler.update_prefetch = [] - compiler.returning = [] - - # no parameters in the statement, no parameters in the - # compiled params - return binds for all columns - if compiler.column_keys is None and stmt.parameters is None: - return [ - (c, _create_bind_param(compiler, c, None, required=True)) - for c in stmt.table.columns - ] - - if stmt._has_multi_parameters: - stmt_parameters = stmt.parameters[0] - else: - stmt_parameters = stmt.parameters - - # getters - these are normally just column.key, - # but in the case of mysql multi-table update, the rules for - # .key must conditionally take tablename into account - ( - _column_as_key, - _getattr_col_key, - _col_bind_name, - ) = _key_getters_for_crud_column(compiler, stmt) - - # if we have statement parameters - set defaults in the - # compiled params - if compiler.column_keys is None: - parameters = {} - else: - parameters = dict( - (_column_as_key(key), REQUIRED) - for key in compiler.column_keys - if not stmt_parameters or key not in stmt_parameters - ) - - # create a list of column assignment clauses as tuples - values = [] - - if stmt_parameters is not None: - _get_stmt_parameters_params( - compiler, parameters, stmt_parameters, _column_as_key, values, kw - ) - - check_columns = {} - - # special logic that only occurs for multi-table UPDATE - # statements - if compiler.isupdate and stmt._extra_froms and stmt_parameters: - _get_multitable_params( - compiler, - stmt, - stmt_parameters, - check_columns, - _col_bind_name, - _getattr_col_key, - values, - kw, - ) - - if compiler.isinsert and stmt.select_names: - _scan_insert_from_select_cols( - compiler, - stmt, - parameters, - _getattr_col_key, - _column_as_key, - _col_bind_name, - check_columns, - values, - kw, - ) - else: - _scan_cols( - compiler, - stmt, - parameters, - _getattr_col_key, - _column_as_key, - _col_bind_name, - check_columns, - values, - kw, - ) - - # [10] CrateDB patch. - # - # This sanity check performed by SQLAlchemy currently needs to be - # deactivated in order to satisfy the rewriting logic of the CrateDB - # dialect in `rewrite_update` and `visit_update`. - # - # It can be quickly reproduced by activating this section and running the - # test cases:: - # - # ./bin/test -vvvv -t dict_test - # - # That croaks like:: - # - # sqlalchemy.exc.CompileError: Unconsumed column names: characters_name, data['nested'] - # - # TODO: Investigate why this is actually happening and eventually mitigate - # the root cause. - """ - if parameters and stmt_parameters: - check = ( - set(parameters) - .intersection(_column_as_key(k) for k in stmt_parameters) - .difference(check_columns) - ) - if check: - raise exc.CompileError( - "Unconsumed column names: %s" - % (", ".join("%s" % c for c in check)) - ) - """ - - if stmt._has_multi_parameters: - values = _extend_values_for_multiparams(compiler, stmt, values, kw) - - return values diff --git a/src/crate/client/sqlalchemy/compat/core14.py b/src/crate/client/sqlalchemy/compat/core14.py deleted file mode 100644 index 2dd6670a..00000000 --- a/src/crate/client/sqlalchemy/compat/core14.py +++ /dev/null @@ -1,359 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql.base import PGCompiler -from sqlalchemy.sql import selectable -from sqlalchemy.sql.crud import (REQUIRED, _create_bind_param, - _extend_values_for_multiparams, - _get_stmt_parameter_tuples_params, - _get_update_multitable_params, - _key_getters_for_crud_column, _scan_cols, - _scan_insert_from_select_cols) - -from crate.client.sqlalchemy.compiler import CrateCompiler - - -class CrateCompilerSA14(CrateCompiler): - - def returning_clause(self, stmt, returning_cols): - """ - Generate RETURNING clause, PostgreSQL-compatible. - """ - return PGCompiler.returning_clause(self, stmt, returning_cols) - - def visit_update(self, update_stmt, **kw): - - compile_state = update_stmt._compile_state_factory( - update_stmt, self, **kw - ) - update_stmt = compile_state.statement - - # [14] CrateDB patch. - if not compile_state._dict_parameters and \ - not hasattr(update_stmt, '_crate_specific'): - return super().visit_update(update_stmt, **kw) - - toplevel = not self.stack - if toplevel: - self.isupdate = True - if not self.compile_state: - self.compile_state = compile_state - - extra_froms = compile_state._extra_froms - is_multitable = bool(extra_froms) - - if is_multitable: - # main table might be a JOIN - main_froms = set(selectable._from_objects(update_stmt.table)) - render_extra_froms = [ - f for f in extra_froms if f not in main_froms - ] - correlate_froms = main_froms.union(extra_froms) - else: - render_extra_froms = [] - correlate_froms = {update_stmt.table} - - self.stack.append( - { - "correlate_froms": correlate_froms, - "asfrom_froms": correlate_froms, - "selectable": update_stmt, - } - ) - - text = "UPDATE " - - if update_stmt._prefixes: - text += self._generate_prefixes( - update_stmt, update_stmt._prefixes, **kw - ) - - table_text = self.update_tables_clause( - update_stmt, update_stmt.table, render_extra_froms, **kw - ) - - # [14] CrateDB patch. - crud_params = _get_crud_params( - self, update_stmt, compile_state, **kw - ) - - if update_stmt._hints: - dialect_hints, table_text = self._setup_crud_hints( - update_stmt, table_text - ) - else: - dialect_hints = None - - if update_stmt._independent_ctes: - for cte in update_stmt._independent_ctes: - cte._compiler_dispatch(self, **kw) - - text += table_text - - text += " SET " - - # [14] CrateDB patch begin. - include_table = \ - extra_froms and self.render_table_with_column_in_update_from - - set_clauses = [] - - for c, expr, value in crud_params: - key = c._compiler_dispatch(self, include_table=include_table) - clause = key + ' = ' + value - set_clauses.append(clause) - - for k, v in compile_state._dict_parameters.items(): - if isinstance(k, str) and '[' in k: - bindparam = sa.sql.bindparam(k, v) - clause = k + ' = ' + self.process(bindparam) - set_clauses.append(clause) - - text += ', '.join(set_clauses) - # [14] CrateDB patch end. - - if self.returning or update_stmt._returning: - if self.returning_precedes_values: - text += " " + self.returning_clause( - update_stmt, self.returning or update_stmt._returning - ) - - if extra_froms: - extra_from_text = self.update_from_clause( - update_stmt, - update_stmt.table, - render_extra_froms, - dialect_hints, - **kw - ) - if extra_from_text: - text += " " + extra_from_text - - if update_stmt._where_criteria: - t = self._generate_delimited_and_list( - update_stmt._where_criteria, **kw - ) - if t: - text += " WHERE " + t - - limit_clause = self.update_limit_clause(update_stmt) - if limit_clause: - text += " " + limit_clause - - if ( - self.returning or update_stmt._returning - ) and not self.returning_precedes_values: - text += " " + self.returning_clause( - update_stmt, self.returning or update_stmt._returning - ) - - if self.ctes: - nesting_level = len(self.stack) if not toplevel else None - text = self._render_cte_clause(nesting_level=nesting_level) + text - - self.stack.pop(-1) - - return text - - -def _get_crud_params(compiler, stmt, compile_state, **kw): - """create a set of tuples representing column/string pairs for use - in an INSERT or UPDATE statement. - - Also generates the Compiled object's postfetch, prefetch, and - returning column collections, used for default handling and ultimately - populating the CursorResult's prefetch_cols() and postfetch_cols() - collections. - - """ - - compiler.postfetch = [] - compiler.insert_prefetch = [] - compiler.update_prefetch = [] - compiler.returning = [] - - # getters - these are normally just column.key, - # but in the case of mysql multi-table update, the rules for - # .key must conditionally take tablename into account - ( - _column_as_key, - _getattr_col_key, - _col_bind_name, - ) = getters = _key_getters_for_crud_column(compiler, stmt, compile_state) - - compiler._key_getters_for_crud_column = getters - - # no parameters in the statement, no parameters in the - # compiled params - return binds for all columns - if compiler.column_keys is None and compile_state._no_parameters: - return [ - ( - c, - compiler.preparer.format_column(c), - _create_bind_param(compiler, c, None, required=True), - ) - for c in stmt.table.columns - ] - - if compile_state._has_multi_parameters: - spd = compile_state._multi_parameters[0] - stmt_parameter_tuples = list(spd.items()) - elif compile_state._ordered_values: - spd = compile_state._dict_parameters - stmt_parameter_tuples = compile_state._ordered_values - elif compile_state._dict_parameters: - spd = compile_state._dict_parameters - stmt_parameter_tuples = list(spd.items()) - else: - stmt_parameter_tuples = spd = None - - # if we have statement parameters - set defaults in the - # compiled params - if compiler.column_keys is None: - parameters = {} - elif stmt_parameter_tuples: - parameters = dict( - (_column_as_key(key), REQUIRED) - for key in compiler.column_keys - if key not in spd - ) - else: - parameters = dict( - (_column_as_key(key), REQUIRED) for key in compiler.column_keys - ) - - # create a list of column assignment clauses as tuples - values = [] - - if stmt_parameter_tuples is not None: - _get_stmt_parameter_tuples_params( - compiler, - compile_state, - parameters, - stmt_parameter_tuples, - _column_as_key, - values, - kw, - ) - - check_columns = {} - - # special logic that only occurs for multi-table UPDATE - # statements - if compile_state.isupdate and compile_state.is_multitable: - _get_update_multitable_params( - compiler, - stmt, - compile_state, - stmt_parameter_tuples, - check_columns, - _col_bind_name, - _getattr_col_key, - values, - kw, - ) - - if compile_state.isinsert and stmt._select_names: - _scan_insert_from_select_cols( - compiler, - stmt, - compile_state, - parameters, - _getattr_col_key, - _column_as_key, - _col_bind_name, - check_columns, - values, - kw, - ) - else: - _scan_cols( - compiler, - stmt, - compile_state, - parameters, - _getattr_col_key, - _column_as_key, - _col_bind_name, - check_columns, - values, - kw, - ) - - # [14] CrateDB patch. - # - # This sanity check performed by SQLAlchemy currently needs to be - # deactivated in order to satisfy the rewriting logic of the CrateDB - # dialect in `rewrite_update` and `visit_update`. - # - # It can be quickly reproduced by activating this section and running the - # test cases:: - # - # ./bin/test -vvvv -t dict_test - # - # That croaks like:: - # - # sqlalchemy.exc.CompileError: Unconsumed column names: characters_name, data['nested'] - # - # TODO: Investigate why this is actually happening and eventually mitigate - # the root cause. - """ - if parameters and stmt_parameter_tuples: - check = ( - set(parameters) - .intersection(_column_as_key(k) for k, v in stmt_parameter_tuples) - .difference(check_columns) - ) - if check: - raise exc.CompileError( - "Unconsumed column names: %s" - % (", ".join("%s" % (c,) for c in check)) - ) - """ - - if compile_state._has_multi_parameters: - values = _extend_values_for_multiparams( - compiler, - stmt, - compile_state, - values, - _column_as_key, - kw, - ) - elif ( - not values - and compiler.for_executemany # noqa: W503 - and compiler.dialect.supports_default_metavalue # noqa: W503 - ): - # convert an "INSERT DEFAULT VALUES" - # into INSERT (firstcol) VALUES (DEFAULT) which can be turned - # into an in-place multi values. This supports - # insert_executemany_returning mode :) - values = [ - ( - stmt.table.columns[0], - compiler.preparer.format_column(stmt.table.columns[0]), - "DEFAULT", - ) - ] - - return values diff --git a/src/crate/client/sqlalchemy/compat/core20.py b/src/crate/client/sqlalchemy/compat/core20.py deleted file mode 100644 index 6f128876..00000000 --- a/src/crate/client/sqlalchemy/compat/core20.py +++ /dev/null @@ -1,447 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Union - -import sqlalchemy as sa -from sqlalchemy import ColumnClause, ValuesBase, cast, exc -from sqlalchemy.sql import dml -from sqlalchemy.sql.base import _from_objects -from sqlalchemy.sql.compiler import SQLCompiler -from sqlalchemy.sql.crud import (REQUIRED, _as_dml_column, _create_bind_param, - _CrudParamElement, _CrudParams, - _extend_values_for_multiparams, - _get_stmt_parameter_tuples_params, - _get_update_multitable_params, - _key_getters_for_crud_column, _scan_cols, - _scan_insert_from_select_cols, - _setup_delete_return_defaults) -from sqlalchemy.sql.dml import DMLState, _DMLColumnElement -from sqlalchemy.sql.dml import isinsert as _compile_state_isinsert - -from crate.client.sqlalchemy.compiler import CrateCompiler - - -class CrateCompilerSA20(CrateCompiler): - - def visit_update(self, update_stmt, **kw): - compile_state = update_stmt._compile_state_factory( - update_stmt, self, **kw - ) - update_stmt = compile_state.statement - - # [20] CrateDB patch. - if not compile_state._dict_parameters and \ - not hasattr(update_stmt, '_crate_specific'): - return super().visit_update(update_stmt, **kw) - - toplevel = not self.stack - if toplevel: - self.isupdate = True - if not self.dml_compile_state: - self.dml_compile_state = compile_state - if not self.compile_state: - self.compile_state = compile_state - - extra_froms = compile_state._extra_froms - is_multitable = bool(extra_froms) - - if is_multitable: - # main table might be a JOIN - main_froms = set(_from_objects(update_stmt.table)) - render_extra_froms = [ - f for f in extra_froms if f not in main_froms - ] - correlate_froms = main_froms.union(extra_froms) - else: - render_extra_froms = [] - correlate_froms = {update_stmt.table} - - self.stack.append( - { - "correlate_froms": correlate_froms, - "asfrom_froms": correlate_froms, - "selectable": update_stmt, - } - ) - - text = "UPDATE " - - if update_stmt._prefixes: - text += self._generate_prefixes( - update_stmt, update_stmt._prefixes, **kw - ) - - table_text = self.update_tables_clause( - update_stmt, update_stmt.table, render_extra_froms, **kw - ) - # [20] CrateDB patch. - crud_params_struct = _get_crud_params( - self, update_stmt, compile_state, toplevel, **kw - ) - crud_params = crud_params_struct.single_params - - if update_stmt._hints: - dialect_hints, table_text = self._setup_crud_hints( - update_stmt, table_text - ) - else: - dialect_hints = None - - if update_stmt._independent_ctes: - self._dispatch_independent_ctes(update_stmt, kw) - - text += table_text - - text += " SET " - - # [20] CrateDB patch begin. - include_table = extra_froms and \ - self.render_table_with_column_in_update_from - - set_clauses = [] - - for c, expr, value, _ in crud_params: - key = c._compiler_dispatch(self, include_table=include_table) - clause = key + ' = ' + value - set_clauses.append(clause) - - for k, v in compile_state._dict_parameters.items(): - if isinstance(k, str) and '[' in k: - bindparam = sa.sql.bindparam(k, v) - clause = k + ' = ' + self.process(bindparam) - set_clauses.append(clause) - - text += ', '.join(set_clauses) - # [20] CrateDB patch end. - - if self.implicit_returning or update_stmt._returning: - if self.returning_precedes_values: - text += " " + self.returning_clause( - update_stmt, - self.implicit_returning or update_stmt._returning, - populate_result_map=toplevel, - ) - - if extra_froms: - extra_from_text = self.update_from_clause( - update_stmt, - update_stmt.table, - render_extra_froms, - dialect_hints, - **kw, - ) - if extra_from_text: - text += " " + extra_from_text - - if update_stmt._where_criteria: - t = self._generate_delimited_and_list( - update_stmt._where_criteria, **kw - ) - if t: - text += " WHERE " + t - - limit_clause = self.update_limit_clause(update_stmt) - if limit_clause: - text += " " + limit_clause - - if ( - self.implicit_returning or update_stmt._returning - ) and not self.returning_precedes_values: - text += " " + self.returning_clause( - update_stmt, - self.implicit_returning or update_stmt._returning, - populate_result_map=toplevel, - ) - - if self.ctes: - nesting_level = len(self.stack) if not toplevel else None - text = self._render_cte_clause(nesting_level=nesting_level) + text - - self.stack.pop(-1) - - return text - - -def _get_crud_params( - compiler: SQLCompiler, - stmt: ValuesBase, - compile_state: DMLState, - toplevel: bool, - **kw: Any, -) -> _CrudParams: - """create a set of tuples representing column/string pairs for use - in an INSERT or UPDATE statement. - - Also generates the Compiled object's postfetch, prefetch, and - returning column collections, used for default handling and ultimately - populating the CursorResult's prefetch_cols() and postfetch_cols() - collections. - - """ - - # note: the _get_crud_params() system was written with the notion in mind - # that INSERT, UPDATE, DELETE are always the top level statement and - # that there is only one of them. With the addition of CTEs that can - # make use of DML, this assumption is no longer accurate; the DML - # statement is not necessarily the top-level "row returning" thing - # and it is also theoretically possible (fortunately nobody has asked yet) - # to have a single statement with multiple DMLs inside of it via CTEs. - - # the current _get_crud_params() design doesn't accommodate these cases - # right now. It "just works" for a CTE that has a single DML inside of - # it, and for a CTE with multiple DML, it's not clear what would happen. - - # overall, the "compiler.XYZ" collections here would need to be in a - # per-DML structure of some kind, and DefaultDialect would need to - # navigate these collections on a per-statement basis, with additional - # emphasis on the "toplevel returning data" statement. However we - # still need to run through _get_crud_params() for all DML as we have - # Python / SQL generated column defaults that need to be rendered. - - # if there is user need for this kind of thing, it's likely a post 2.0 - # kind of change as it would require deep changes to DefaultDialect - # as well as here. - - compiler.postfetch = [] - compiler.insert_prefetch = [] - compiler.update_prefetch = [] - compiler.implicit_returning = [] - - # getters - these are normally just column.key, - # but in the case of mysql multi-table update, the rules for - # .key must conditionally take tablename into account - ( - _column_as_key, - _getattr_col_key, - _col_bind_name, - ) = _key_getters_for_crud_column(compiler, stmt, compile_state) - - compiler._get_bind_name_for_col = _col_bind_name - - if stmt._returning and stmt._return_defaults: - raise exc.CompileError( - "Can't compile statement that includes returning() and " - "return_defaults() simultaneously" - ) - - if compile_state.isdelete: - _setup_delete_return_defaults( - compiler, - stmt, - compile_state, - (), - _getattr_col_key, - _column_as_key, - _col_bind_name, - (), - (), - toplevel, - kw, - ) - return _CrudParams([], []) - - # no parameters in the statement, no parameters in the - # compiled params - return binds for all columns - if compiler.column_keys is None and compile_state._no_parameters: - return _CrudParams( - [ - ( - c, - compiler.preparer.format_column(c), - _create_bind_param(compiler, c, None, required=True), - (c.key,), - ) - for c in stmt.table.columns - ], - [], - ) - - stmt_parameter_tuples: Optional[ - List[Tuple[Union[str, ColumnClause[Any]], Any]] - ] - spd: Optional[MutableMapping[_DMLColumnElement, Any]] - - if ( - _compile_state_isinsert(compile_state) - and compile_state._has_multi_parameters - ): - mp = compile_state._multi_parameters - assert mp is not None - spd = mp[0] - stmt_parameter_tuples = list(spd.items()) - elif compile_state._ordered_values: - spd = compile_state._dict_parameters - stmt_parameter_tuples = compile_state._ordered_values - elif compile_state._dict_parameters: - spd = compile_state._dict_parameters - stmt_parameter_tuples = list(spd.items()) - else: - stmt_parameter_tuples = spd = None - - # if we have statement parameters - set defaults in the - # compiled params - if compiler.column_keys is None: - parameters = {} - elif stmt_parameter_tuples: - assert spd is not None - parameters = { - _column_as_key(key): REQUIRED - for key in compiler.column_keys - if key not in spd - } - else: - parameters = { - _column_as_key(key): REQUIRED for key in compiler.column_keys - } - - # create a list of column assignment clauses as tuples - values: List[_CrudParamElement] = [] - - if stmt_parameter_tuples is not None: - _get_stmt_parameter_tuples_params( - compiler, - compile_state, - parameters, - stmt_parameter_tuples, - _column_as_key, - values, - kw, - ) - - check_columns: Dict[str, ColumnClause[Any]] = {} - - # special logic that only occurs for multi-table UPDATE - # statements - if dml.isupdate(compile_state) and compile_state.is_multitable: - _get_update_multitable_params( - compiler, - stmt, - compile_state, - stmt_parameter_tuples, - check_columns, - _col_bind_name, - _getattr_col_key, - values, - kw, - ) - - if _compile_state_isinsert(compile_state) and stmt._select_names: - # is an insert from select, is not a multiparams - - assert not compile_state._has_multi_parameters - - _scan_insert_from_select_cols( - compiler, - stmt, - compile_state, - parameters, - _getattr_col_key, - _column_as_key, - _col_bind_name, - check_columns, - values, - toplevel, - kw, - ) - else: - _scan_cols( - compiler, - stmt, - compile_state, - parameters, - _getattr_col_key, - _column_as_key, - _col_bind_name, - check_columns, - values, - toplevel, - kw, - ) - - # [20] CrateDB patch. - # - # This sanity check performed by SQLAlchemy currently needs to be - # deactivated in order to satisfy the rewriting logic of the CrateDB - # dialect in `rewrite_update` and `visit_update`. - # - # It can be quickly reproduced by activating this section and running the - # test cases:: - # - # ./bin/test -vvvv -t dict_test - # - # That croaks like:: - # - # sqlalchemy.exc.CompileError: Unconsumed column names: characters_name - # - # TODO: Investigate why this is actually happening and eventually mitigate - # the root cause. - """ - if parameters and stmt_parameter_tuples: - check = ( - set(parameters) - .intersection(_column_as_key(k) for k, v in stmt_parameter_tuples) - .difference(check_columns) - ) - if check: - raise exc.CompileError( - "Unconsumed column names: %s" - % (", ".join("%s" % (c,) for c in check)) - ) - """ - - if ( - _compile_state_isinsert(compile_state) - and compile_state._has_multi_parameters - ): - # is a multiparams, is not an insert from a select - assert not stmt._select_names - multi_extended_values = _extend_values_for_multiparams( - compiler, - stmt, - compile_state, - cast( - "Sequence[_CrudParamElementStr]", - values, - ), - cast("Callable[..., str]", _column_as_key), - kw, - ) - return _CrudParams(values, multi_extended_values) - elif ( - not values - and compiler.for_executemany - and compiler.dialect.supports_default_metavalue - ): - # convert an "INSERT DEFAULT VALUES" - # into INSERT (firstcol) VALUES (DEFAULT) which can be turned - # into an in-place multi values. This supports - # insert_executemany_returning mode :) - values = [ - ( - _as_dml_column(stmt.table.columns[0]), - compiler.preparer.format_column(stmt.table.columns[0]), - compiler.dialect.default_metavalue_token, - (), - ) - ] - - return _CrudParams(values, []) diff --git a/src/crate/client/sqlalchemy/compiler.py b/src/crate/client/sqlalchemy/compiler.py deleted file mode 100644 index 767ad638..00000000 --- a/src/crate/client/sqlalchemy/compiler.py +++ /dev/null @@ -1,318 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -import string -import warnings -from collections import defaultdict - -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql.base import PGCompiler -from sqlalchemy.sql import compiler -from sqlalchemy.types import String -from .types import MutableDict, ObjectTypeImpl, Geopoint, Geoshape -from .sa_version import SA_VERSION, SA_1_4 - - -def rewrite_update(clauseelement, multiparams, params): - """ change the params to enable partial updates - - sqlalchemy by default only supports updates of complex types in the form of - - "col = ?", ({"x": 1, "y": 2} - - but crate supports - - "col['x'] = ?, col['y'] = ?", (1, 2) - - by using the `ObjectType` (`MutableDict`) type. - The update statement is only rewritten if an item of the MutableDict was - changed. - """ - newmultiparams = [] - _multiparams = multiparams[0] - if len(_multiparams) == 0: - return clauseelement, multiparams, params - for _params in _multiparams: - newparams = {} - for key, val in _params.items(): - if ( - not isinstance(val, MutableDict) or - (not any(val._changed_keys) and not any(val._deleted_keys)) - ): - newparams[key] = val - continue - - for subkey, subval in val.items(): - if subkey in val._changed_keys: - newparams["{0}['{1}']".format(key, subkey)] = subval - for subkey in val._deleted_keys: - newparams["{0}['{1}']".format(key, subkey)] = None - newmultiparams.append(newparams) - _multiparams = (newmultiparams, ) - clause = clauseelement.values(newmultiparams[0]) - clause._crate_specific = True - return clause, _multiparams, params - - -@sa.event.listens_for(sa.engine.Engine, "before_execute", retval=True) -def crate_before_execute(conn, clauseelement, multiparams, params, *args, **kwargs): - is_crate = type(conn.dialect).__name__ == 'CrateDialect' - if is_crate and isinstance(clauseelement, sa.sql.expression.Update): - if SA_VERSION >= SA_1_4: - if params is None: - multiparams = ([],) - else: - multiparams = ([params],) - params = {} - - clauseelement, multiparams, params = rewrite_update(clauseelement, multiparams, params) - - if SA_VERSION >= SA_1_4: - if multiparams[0]: - params = multiparams[0][0] - else: - params = multiparams[0] - multiparams = [] - - return clauseelement, multiparams, params - - -class CrateDDLCompiler(compiler.DDLCompiler): - - __special_opts_tmpl = { - 'PARTITIONED_BY': ' PARTITIONED BY ({0})' - } - __clustered_opts_tmpl = { - 'NUMBER_OF_SHARDS': ' INTO {0} SHARDS', - 'CLUSTERED_BY': ' BY ({0})', - } - __clustered_opt_tmpl = ' CLUSTERED{CLUSTERED_BY}{NUMBER_OF_SHARDS}' - - def get_column_specification(self, column, **kwargs): - colspec = self.preparer.format_column(column) + " " + \ - self.dialect.type_compiler.process(column.type) - - default = self.get_column_default_string(column) - if default is not None: - colspec += " DEFAULT " + default - - if column.computed is not None: - colspec += " " + self.process(column.computed) - - if column.nullable is False: - colspec += " NOT NULL" - elif column.nullable and column.primary_key: - raise sa.exc.CompileError( - "Primary key columns cannot be nullable" - ) - - if column.dialect_options['crate'].get('index') is False: - if isinstance(column.type, (Geopoint, Geoshape, ObjectTypeImpl)): - raise sa.exc.CompileError( - "Disabling indexing is not supported for column " - "types OBJECT, GEO_POINT, and GEO_SHAPE" - ) - - colspec += " INDEX OFF" - - if column.dialect_options['crate'].get('columnstore') is False: - if not isinstance(column.type, (String, )): - raise sa.exc.CompileError( - "Controlling the columnstore is only allowed for STRING columns" - ) - - colspec += " STORAGE WITH (columnstore = false)" - - return colspec - - def visit_computed_column(self, generated): - if generated.persisted is False: - raise sa.exc.CompileError( - "Virtual computed columns are not supported, set " - "'persisted' to None or True" - ) - - return "GENERATED ALWAYS AS (%s)" % self.sql_compiler.process( - generated.sqltext, include_table=False, literal_binds=True - ) - - def post_create_table(self, table): - special_options = '' - clustered_options = defaultdict(str) - table_opts = [] - - opts = dict( - (k[len(self.dialect.name) + 1:].upper(), v) - for k, v, in table.kwargs.items() - if k.startswith('%s_' % self.dialect.name) - ) - for k, v in opts.items(): - if k in self.__special_opts_tmpl: - special_options += self.__special_opts_tmpl[k].format(v) - elif k in self.__clustered_opts_tmpl: - clustered_options[k] = self.__clustered_opts_tmpl[k].format(v) - else: - table_opts.append('{0} = {1}'.format(k, v)) - if clustered_options: - special_options += string.Formatter().vformat( - self.__clustered_opt_tmpl, (), clustered_options) - if table_opts: - return special_options + ' WITH ({0})'.format( - ', '.join(sorted(table_opts))) - return special_options - - def visit_foreign_key_constraint(self, constraint, **kw): - """ - CrateDB does not support foreign key constraints. - """ - warnings.warn("CrateDB does not support foreign key constraints, " - "they will be omitted when generating DDL statements.") - return None - - def visit_unique_constraint(self, constraint, **kw): - """ - CrateDB does not support unique key constraints. - """ - warnings.warn("CrateDB does not support unique constraints, " - "they will be omitted when generating DDL statements.") - return None - - -class CrateTypeCompiler(compiler.GenericTypeCompiler): - - def visit_string(self, type_, **kw): - return 'STRING' - - def visit_unicode(self, type_, **kw): - return 'STRING' - - def visit_TEXT(self, type_, **kw): - return 'STRING' - - def visit_DECIMAL(self, type_, **kw): - return 'DOUBLE' - - def visit_BIGINT(self, type_, **kw): - return 'LONG' - - def visit_NUMERIC(self, type_, **kw): - return 'LONG' - - def visit_INTEGER(self, type_, **kw): - return 'INT' - - def visit_SMALLINT(self, type_, **kw): - return 'SHORT' - - def visit_datetime(self, type_, **kw): - return 'TIMESTAMP' - - def visit_date(self, type_, **kw): - return 'TIMESTAMP' - - def visit_ARRAY(self, type_, **kw): - if type_.dimensions is not None and type_.dimensions > 1: - raise NotImplementedError( - "CrateDB doesn't support multidimensional arrays") - return 'ARRAY({0})'.format(self.process(type_.item_type)) - - def visit_OBJECT(self, type_, **kw): - return "OBJECT" - - -class CrateCompiler(compiler.SQLCompiler): - - def visit_getitem_binary(self, binary, operator, **kw): - return "{0}['{1}']".format( - self.process(binary.left, **kw), - binary.right.value - ) - - def visit_json_getitem_op_binary( - self, binary, operator, _cast_applied=False, **kw - ): - return "{0}['{1}']".format( - self.process(binary.left, **kw), - binary.right.value - ) - - def visit_any(self, element, **kw): - return "%s%sANY (%s)" % ( - self.process(element.left, **kw), - compiler.OPERATORS[element.operator], - self.process(element.right, **kw) - ) - - def visit_ilike_case_insensitive_operand(self, element, **kw): - """ - Use native `ILIKE` operator, like PostgreSQL's `PGCompiler`. - """ - if self.dialect.has_ilike_operator(): - return element.element._compiler_dispatch(self, **kw) - else: - return super().visit_ilike_case_insensitive_operand(element, **kw) - - def visit_ilike_op_binary(self, binary, operator, **kw): - """ - Use native `ILIKE` operator, like PostgreSQL's `PGCompiler`. - - Do not implement the `ESCAPE` functionality, because it is not - supported by CrateDB. - """ - if binary.modifiers.get("escape", None) is not None: - raise NotImplementedError("Unsupported feature: ESCAPE is not supported") - if self.dialect.has_ilike_operator(): - return "%s ILIKE %s" % ( - self.process(binary.left, **kw), - self.process(binary.right, **kw), - ) - else: - return super().visit_ilike_op_binary(binary, operator, **kw) - - def visit_not_ilike_op_binary(self, binary, operator, **kw): - """ - Use native `ILIKE` operator, like PostgreSQL's `PGCompiler`. - - Do not implement the `ESCAPE` functionality, because it is not - supported by CrateDB. - """ - if binary.modifiers.get("escape", None) is not None: - raise NotImplementedError("Unsupported feature: ESCAPE is not supported") - if self.dialect.has_ilike_operator(): - return "%s NOT ILIKE %s" % ( - self.process(binary.left, **kw), - self.process(binary.right, **kw), - ) - else: - return super().visit_not_ilike_op_binary(binary, operator, **kw) - - def limit_clause(self, select, **kw): - """ - Generate OFFSET / LIMIT clause, PostgreSQL-compatible. - """ - return PGCompiler.limit_clause(self, select, **kw) - - def for_update_clause(self, select, **kw): - # CrateDB does not support the `INSERT ... FOR UPDATE` clause. - # See https://github.com/crate/crate-python/issues/577. - warnings.warn("CrateDB does not support the 'INSERT ... FOR UPDATE' clause, " - "it will be omitted when generating SQL statements.") - return '' diff --git a/src/crate/client/sqlalchemy/dialect.py b/src/crate/client/sqlalchemy/dialect.py deleted file mode 100644 index 3f1197df..00000000 --- a/src/crate/client/sqlalchemy/dialect.py +++ /dev/null @@ -1,369 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -import logging -from datetime import datetime, date - -from sqlalchemy import types as sqltypes -from sqlalchemy.engine import default, reflection -from sqlalchemy.sql import functions -from sqlalchemy.util import asbool, to_list - -from .compiler import ( - CrateTypeCompiler, - CrateDDLCompiler -) -from crate.client.exceptions import TimezoneUnawareException -from .sa_version import SA_VERSION, SA_1_4, SA_2_0 -from .types import ObjectType, ObjectArray - -TYPES_MAP = { - "boolean": sqltypes.Boolean, - "short": sqltypes.SmallInteger, - "smallint": sqltypes.SmallInteger, - "timestamp": sqltypes.TIMESTAMP, - "timestamp with time zone": sqltypes.TIMESTAMP, - "object": ObjectType, - "integer": sqltypes.Integer, - "long": sqltypes.NUMERIC, - "bigint": sqltypes.NUMERIC, - "double": sqltypes.DECIMAL, - "double precision": sqltypes.DECIMAL, - "object_array": ObjectArray, - "float": sqltypes.Float, - "real": sqltypes.Float, - "string": sqltypes.String, - "text": sqltypes.String -} -try: - # SQLAlchemy >= 1.1 - from sqlalchemy.types import ARRAY - TYPES_MAP["integer_array"] = ARRAY(sqltypes.Integer) - TYPES_MAP["boolean_array"] = ARRAY(sqltypes.Boolean) - TYPES_MAP["short_array"] = ARRAY(sqltypes.SmallInteger) - TYPES_MAP["smallint_array"] = ARRAY(sqltypes.SmallInteger) - TYPES_MAP["timestamp_array"] = ARRAY(sqltypes.TIMESTAMP) - TYPES_MAP["timestamp with time zone_array"] = ARRAY(sqltypes.TIMESTAMP) - TYPES_MAP["long_array"] = ARRAY(sqltypes.NUMERIC) - TYPES_MAP["bigint_array"] = ARRAY(sqltypes.NUMERIC) - TYPES_MAP["double_array"] = ARRAY(sqltypes.DECIMAL) - TYPES_MAP["double precision_array"] = ARRAY(sqltypes.DECIMAL) - TYPES_MAP["float_array"] = ARRAY(sqltypes.Float) - TYPES_MAP["real_array"] = ARRAY(sqltypes.Float) - TYPES_MAP["string_array"] = ARRAY(sqltypes.String) - TYPES_MAP["text_array"] = ARRAY(sqltypes.String) -except Exception: - pass - - -log = logging.getLogger(__name__) - - -class Date(sqltypes.Date): - def bind_processor(self, dialect): - def process(value): - if value is not None: - assert isinstance(value, date) - return value.strftime('%Y-%m-%d') - return process - - def result_processor(self, dialect, coltype): - def process(value): - if not value: - return - try: - return datetime.utcfromtimestamp(value / 1e3).date() - except TypeError: - pass - - # Crate doesn't really have datetime or date types but a - # timestamp type. The "date" mapping (conversion to long) - # is only applied if the schema definition for the column exists - # and if the sql insert statement was used. - # In case of dynamic mapping or using the rest indexing endpoint - # the date will be returned in the format it was inserted. - log.warning( - "Received timestamp isn't a long value." - "Trying to parse as date string and then as datetime string") - try: - return datetime.strptime(value, '%Y-%m-%d').date() - except ValueError: - return datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ').date() - return process - - -class DateTime(sqltypes.DateTime): - - TZ_ERROR_MSG = "Timezone aware datetime objects are not supported" - - def bind_processor(self, dialect): - def process(value): - if value is not None: - assert isinstance(value, datetime) - if value.tzinfo is not None: - raise TimezoneUnawareException(DateTime.TZ_ERROR_MSG) - return value.strftime('%Y-%m-%dT%H:%M:%S.%fZ') - return value - return process - - def result_processor(self, dialect, coltype): - def process(value): - if not value: - return - try: - return datetime.utcfromtimestamp(value / 1e3) - except TypeError: - pass - - # Crate doesn't really have datetime or date types but a - # timestamp type. The "date" mapping (conversion to long) - # is only applied if the schema definition for the column exists - # and if the sql insert statement was used. - # In case of dynamic mapping or using the rest indexing endpoint - # the date will be returned in the format it was inserted. - log.warning( - "Received timestamp isn't a long value." - "Trying to parse as datetime string and then as date string") - try: - return datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ') - except ValueError: - return datetime.strptime(value, '%Y-%m-%d') - return process - - -colspecs = { - sqltypes.DateTime: DateTime, - sqltypes.Date: Date -} - - -if SA_VERSION >= SA_2_0: - from .compat.core20 import CrateCompilerSA20 - statement_compiler = CrateCompilerSA20 -elif SA_VERSION >= SA_1_4: - from .compat.core14 import CrateCompilerSA14 - statement_compiler = CrateCompilerSA14 -else: - from .compat.core10 import CrateCompilerSA10 - statement_compiler = CrateCompilerSA10 - - -class CrateDialect(default.DefaultDialect): - name = 'crate' - driver = 'crate-python' - default_paramstyle = 'qmark' - statement_compiler = statement_compiler - ddl_compiler = CrateDDLCompiler - type_compiler = CrateTypeCompiler - use_insertmanyvalues = True - use_insertmanyvalues_wo_returning = True - supports_multivalues_insert = True - supports_native_boolean = True - supports_statement_cache = True - colspecs = colspecs - implicit_returning = True - insert_returning = True - update_returning = True - - def __init__(self, **kwargs): - default.DefaultDialect.__init__(self, **kwargs) - - # CrateDB does not need `OBJECT` types to be serialized as JSON. - # Corresponding data is forwarded 1:1, and will get marshalled - # by the low-level driver. - self._json_deserializer = lambda x: x - self._json_serializer = lambda x: x - - # Currently, our SQL parser doesn't support unquoted column names that - # start with _. Adding it here causes sqlalchemy to quote such columns. - self.identifier_preparer.illegal_initial_characters.add('_') - - def initialize(self, connection): - # get lowest server version - self.server_version_info = \ - self._get_server_version_info(connection) - # get default schema name - self.default_schema_name = \ - self._get_default_schema_name(connection) - - def do_rollback(self, connection): - # if any exception is raised by the dbapi, sqlalchemy by default - # attempts to do a rollback crate doesn't support rollbacks. - # implementing this as noop seems to cause sqlalchemy to propagate the - # original exception to the user - pass - - def connect(self, host=None, port=None, *args, **kwargs): - server = None - if host: - server = '{0}:{1}'.format(host, port or '4200') - if 'servers' in kwargs: - server = kwargs.pop('servers') - servers = to_list(server) - if servers: - use_ssl = asbool(kwargs.pop("ssl", False)) - if use_ssl: - servers = ["https://" + server for server in servers] - return self.dbapi.connect(servers=servers, **kwargs) - return self.dbapi.connect(**kwargs) - - def _get_default_schema_name(self, connection): - return 'doc' - - def _get_server_version_info(self, connection): - return tuple(connection.connection.lowest_server_version.version) - - @classmethod - def import_dbapi(cls): - from crate import client - return client - - @classmethod - def dbapi(cls): - return cls.import_dbapi() - - def has_schema(self, connection, schema, **kw): - return schema in self.get_schema_names(connection, **kw) - - def has_table(self, connection, table_name, schema=None, **kw): - return table_name in self.get_table_names(connection, schema=schema, **kw) - - @reflection.cache - def get_schema_names(self, connection, **kw): - cursor = connection.exec_driver_sql( - "select schema_name " - "from information_schema.schemata " - "order by schema_name asc" - ) - return [row[0] for row in cursor.fetchall()] - - @reflection.cache - def get_table_names(self, connection, schema=None, **kw): - cursor = connection.exec_driver_sql( - "SELECT table_name FROM information_schema.tables " - "WHERE {0} = ? " - "AND table_type = 'BASE TABLE' " - "ORDER BY table_name ASC, {0} ASC".format(self.schema_column), - (schema or self.default_schema_name, ) - ) - return [row[0] for row in cursor.fetchall()] - - @reflection.cache - def get_view_names(self, connection, schema=None, **kw): - cursor = connection.exec_driver_sql( - "SELECT table_name FROM information_schema.views " - "ORDER BY table_name ASC, {0} ASC".format(self.schema_column), - (schema or self.default_schema_name, ) - ) - return [row[0] for row in cursor.fetchall()] - - @reflection.cache - def get_columns(self, connection, table_name, schema=None, **kw): - query = "SELECT column_name, data_type " \ - "FROM information_schema.columns " \ - "WHERE table_name = ? AND {0} = ? " \ - "AND column_name !~ ?" \ - .format(self.schema_column) - cursor = connection.exec_driver_sql( - query, - (table_name, - schema or self.default_schema_name, - r"(.*)\[\'(.*)\'\]") # regex to filter subscript - ) - return [self._create_column_info(row) for row in cursor.fetchall()] - - @reflection.cache - def get_pk_constraint(self, engine, table_name, schema=None, **kw): - if self.server_version_info >= (3, 0, 0): - query = """SELECT column_name - FROM information_schema.key_column_usage - WHERE table_name = ? AND table_schema = ?""" - - def result_fun(result): - rows = result.fetchall() - return set(map(lambda el: el[0], rows)) - - elif self.server_version_info >= (2, 3, 0): - query = """SELECT column_name - FROM information_schema.key_column_usage - WHERE table_name = ? AND table_catalog = ?""" - - def result_fun(result): - rows = result.fetchall() - return set(map(lambda el: el[0], rows)) - - else: - query = """SELECT constraint_name - FROM information_schema.table_constraints - WHERE table_name = ? AND {schema_col} = ? - AND constraint_type='PRIMARY_KEY' - """.format(schema_col=self.schema_column) - - def result_fun(result): - rows = result.fetchone() - return set(rows[0] if rows else []) - - pk_result = engine.exec_driver_sql( - query, - (table_name, schema or self.default_schema_name) - ) - pks = result_fun(pk_result) - return {'constrained_columns': pks, - 'name': 'PRIMARY KEY'} - - @reflection.cache - def get_foreign_keys(self, connection, table_name, schema=None, - postgresql_ignore_search_path=False, **kw): - # Crate doesn't support Foreign Keys, so this stays empty - return [] - - @reflection.cache - def get_indexes(self, connection, table_name, schema, **kw): - return [] - - @property - def schema_column(self): - return "table_schema" - - def _create_column_info(self, row): - return { - 'name': row[0], - 'type': self._resolve_type(row[1]), - # In Crate every column is nullable except PK - # Primary Key Constraints are not nullable anyway, no matter what - # we return here, so it's fine to return always `True` - 'nullable': True - } - - def _resolve_type(self, type_): - return TYPES_MAP.get(type_, sqltypes.UserDefinedType) - - def has_ilike_operator(self): - """ - Only CrateDB 4.1.0 and higher implements the `ILIKE` operator. - """ - server_version_info = self.server_version_info - return server_version_info is not None and server_version_info >= (4, 1, 0) - - -class DateTrunc(functions.GenericFunction): - name = "date_trunc" - type = sqltypes.TIMESTAMP diff --git a/src/crate/client/sqlalchemy/predicates/__init__.py b/src/crate/client/sqlalchemy/predicates/__init__.py deleted file mode 100644 index 4f974f92..00000000 --- a/src/crate/client/sqlalchemy/predicates/__init__.py +++ /dev/null @@ -1,99 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from sqlalchemy.sql.expression import ColumnElement, literal -from sqlalchemy.ext.compiler import compiles - - -class Match(ColumnElement): - inherit_cache = True - - def __init__(self, column, term, match_type=None, options=None): - super(Match, self).__init__() - self.column = column - self.term = term - self.match_type = match_type - self.options = options - - def compile_column(self, compiler): - if isinstance(self.column, dict): - column = ', '.join( - sorted(["{0} {1}".format(compiler.process(k), v) - for k, v in self.column.items()]) - ) - return "({0})".format(column) - else: - return "{0}".format(compiler.process(self.column)) - - def compile_term(self, compiler): - return compiler.process(literal(self.term)) - - def compile_using(self, compiler): - if self.match_type: - using = "using {0}".format(self.match_type) - with_clause = self.with_clause() - if with_clause: - using = ' '.join([using, with_clause]) - return using - if self.options: - raise ValueError("missing match_type. " + - "It's not allowed to specify options " + - "without match_type") - - def with_clause(self): - if self.options: - options = ', '.join( - sorted(["{0}={1}".format(k, v) - for k, v in self.options.items()]) - ) - - return "with ({0})".format(options) - - -def match(column, term, match_type=None, options=None): - """Generates match predicate for fulltext search - - :param column: A reference to a column or an index, or a subcolumn, or a - dictionary of subcolumns with boost values. - - :param term: The term to match against. This string is analyzed and the - resulting tokens are compared to the index. - - :param match_type (optional): The match type. Determine how the term is - applied and the score calculated. - - :param options (optional): The match options. Specify match type behaviour. - (Not possible without a specified match type.) Match options must be - supplied as a dictionary. - """ - return Match(column, term, match_type, options) - - -@compiles(Match) -def compile_match(match, compiler, **kwargs): - func = "match(%s, %s)" % ( - match.compile_column(compiler), - match.compile_term(compiler) - ) - using = match.compile_using(compiler) - if using: - func = ' '.join([func, using]) - return func diff --git a/src/crate/client/sqlalchemy/sa_version.py b/src/crate/client/sqlalchemy/sa_version.py deleted file mode 100644 index 6b45f8b8..00000000 --- a/src/crate/client/sqlalchemy/sa_version.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -import sqlalchemy as sa -from verlib2 import Version - -SA_VERSION = Version(sa.__version__) - -SA_1_4 = Version('1.4.0b1') -SA_2_0 = Version('2.0.0') diff --git a/src/crate/client/sqlalchemy/support.py b/src/crate/client/sqlalchemy/support.py deleted file mode 100644 index 326e41ce..00000000 --- a/src/crate/client/sqlalchemy/support.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. -import logging - - -logger = logging.getLogger(__name__) - - -def insert_bulk(pd_table, conn, keys, data_iter): - """ - Use CrateDB's "bulk operations" endpoint as a fast path for pandas' and Dask's `to_sql()` [1] method. - - The idea is to break out of SQLAlchemy, compile the insert statement, and use the raw - DBAPI connection client, in order to invoke a request using `bulk_parameters` [2]:: - - cursor.execute(sql=sql, bulk_parameters=data) - - The vanilla implementation, used by SQLAlchemy, is:: - - data = [dict(zip(keys, row)) for row in data_iter] - conn.execute(pd_table.table.insert(), data) - - Batch chunking will happen outside of this function, for example [3] demonstrates - the relevant code in `pandas.io.sql`. - - [1] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html - [2] https://crate.io/docs/crate/reference/en/latest/interfaces/http.html#bulk-operations - [3] https://github.com/pandas-dev/pandas/blob/v2.0.1/pandas/io/sql.py#L1011-L1027 - """ - - # Compile SQL statement and materialize batch. - sql = str(pd_table.table.insert().compile(bind=conn)) - data = list(data_iter) - - # For debugging and tracing the batches running through this method. - if logger.level == logging.DEBUG: - logger.debug(f"Bulk SQL: {sql}") - logger.debug(f"Bulk records: {len(data)}") - # logger.debug(f"Bulk data: {data}") - - # Invoke bulk insert operation. - cursor = conn._dbapi_connection.cursor() - cursor.execute(sql=sql, bulk_parameters=data) - cursor.close() diff --git a/src/crate/client/sqlalchemy/tests/__init__.py b/src/crate/client/sqlalchemy/tests/__init__.py deleted file mode 100644 index d6d37493..00000000 --- a/src/crate/client/sqlalchemy/tests/__init__.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- - -from ..compat.api13 import monkeypatch_amend_select_sa14, monkeypatch_add_connectionfairy_driver_connection -from ..sa_version import SA_1_4, SA_VERSION -from ...test_util import ParametrizedTestCase - -# `sql.select()` of SQLAlchemy 1.3 uses old calling semantics, -# but the test cases already need the modern ones. -if SA_VERSION < SA_1_4: - monkeypatch_amend_select_sa14() - monkeypatch_add_connectionfairy_driver_connection() - -from unittest import TestLoader, TestSuite -from .connection_test import SqlAlchemyConnectionTest -from .dict_test import SqlAlchemyDictTypeTest -from .datetime_test import SqlAlchemyDateAndDateTimeTest -from .compiler_test import SqlAlchemyCompilerTest, SqlAlchemyDDLCompilerTest -from .update_test import SqlAlchemyUpdateTest -from .match_test import SqlAlchemyMatchTest -from .bulk_test import SqlAlchemyBulkTest -from .insert_from_select_test import SqlAlchemyInsertFromSelectTest -from .create_table_test import SqlAlchemyCreateTableTest -from .array_test import SqlAlchemyArrayTypeTest -from .dialect_test import SqlAlchemyDialectTest -from .function_test import SqlAlchemyFunctionTest -from .warnings_test import SqlAlchemyWarningsTest -from .query_caching import SqlAlchemyQueryCompilationCaching - - -makeSuite = TestLoader().loadTestsFromTestCase - - -def test_suite_unit(): - tests = TestSuite() - tests.addTest(makeSuite(SqlAlchemyConnectionTest)) - tests.addTest(makeSuite(SqlAlchemyDictTypeTest)) - tests.addTest(makeSuite(SqlAlchemyDateAndDateTimeTest)) - tests.addTest(makeSuite(SqlAlchemyCompilerTest)) - tests.addTest(makeSuite(SqlAlchemyDDLCompilerTest)) - tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": None})) - tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": (4, 0, 12)})) - tests.addTest(ParametrizedTestCase.parametrize(SqlAlchemyCompilerTest, param={"server_version_info": (4, 1, 10)})) - tests.addTest(makeSuite(SqlAlchemyUpdateTest)) - tests.addTest(makeSuite(SqlAlchemyMatchTest)) - tests.addTest(makeSuite(SqlAlchemyCreateTableTest)) - tests.addTest(makeSuite(SqlAlchemyBulkTest)) - tests.addTest(makeSuite(SqlAlchemyInsertFromSelectTest)) - tests.addTest(makeSuite(SqlAlchemyInsertFromSelectTest)) - tests.addTest(makeSuite(SqlAlchemyDialectTest)) - tests.addTest(makeSuite(SqlAlchemyFunctionTest)) - tests.addTest(makeSuite(SqlAlchemyArrayTypeTest)) - tests.addTest(makeSuite(SqlAlchemyWarningsTest)) - return tests - - -def test_suite_integration(): - tests = TestSuite() - tests.addTest(makeSuite(SqlAlchemyQueryCompilationCaching)) - return tests diff --git a/src/crate/client/sqlalchemy/tests/array_test.py b/src/crate/client/sqlalchemy/tests/array_test.py deleted file mode 100644 index 6d663327..00000000 --- a/src/crate/client/sqlalchemy/tests/array_test.py +++ /dev/null @@ -1,111 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - - -from unittest import TestCase -from unittest.mock import patch, MagicMock - -import sqlalchemy as sa -from sqlalchemy.sql import operators -from sqlalchemy.orm import Session -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.cursor import Cursor - -fake_cursor = MagicMock(name='fake_cursor') -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) -FakeCursor.return_value = fake_cursor - - -@patch('crate.client.connection.Cursor', FakeCursor) -class SqlAlchemyArrayTypeTest(TestCase): - - def setUp(self): - self.engine = sa.create_engine('crate://') - Base = declarative_base() - self.metadata = sa.MetaData() - - class User(Base): - __tablename__ = 'users' - - name = sa.Column(sa.String, primary_key=True) - friends = sa.Column(sa.ARRAY(sa.String)) - scores = sa.Column(sa.ARRAY(sa.Integer)) - - self.User = User - self.session = Session(bind=self.engine) - - def assertSQL(self, expected_str, actual_expr): - self.assertEqual(expected_str, str(actual_expr).replace('\n', '')) - - def test_create_with_array(self): - t1 = sa.Table('t', self.metadata, - sa.Column('int_array', sa.ARRAY(sa.Integer)), - sa.Column('str_array', sa.ARRAY(sa.String)) - ) - t1.create(self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'int_array ARRAY(INT), \n\t' - 'str_array ARRAY(STRING)\n)\n\n'), - ()) - - def test_array_insert(self): - trillian = self.User(name='Trillian', friends=['Arthur', 'Ford']) - self.session.add(trillian) - self.session.commit() - fake_cursor.execute.assert_called_with( - ("INSERT INTO users (name, friends, scores) VALUES (?, ?, ?)"), - ('Trillian', ['Arthur', 'Ford'], None)) - - def test_any(self): - s = self.session.query(self.User.name) \ - .filter(self.User.friends.any("arthur")) - self.assertSQL( - "SELECT users.name AS users_name FROM users " - "WHERE ? = ANY (users.friends)", - s - ) - - def test_any_with_operator(self): - s = self.session.query(self.User.name) \ - .filter(self.User.scores.any(6, operator=operators.lt)) - self.assertSQL( - "SELECT users.name AS users_name FROM users " - "WHERE ? < ANY (users.scores)", - s - ) - - def test_multidimensional_arrays(self): - t1 = sa.Table('t', self.metadata, - sa.Column('unsupported_array', - sa.ARRAY(sa.Integer, dimensions=2)), - ) - err = None - try: - t1.create(self.engine) - except NotImplementedError as e: - err = e - self.assertEqual(str(err), - "CrateDB doesn't support multidimensional arrays") diff --git a/src/crate/client/sqlalchemy/tests/bulk_test.py b/src/crate/client/sqlalchemy/tests/bulk_test.py deleted file mode 100644 index a628afa0..00000000 --- a/src/crate/client/sqlalchemy/tests/bulk_test.py +++ /dev/null @@ -1,256 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. -import math -import sys -from unittest import TestCase, skipIf -from unittest.mock import patch, MagicMock - -import sqlalchemy as sa -from sqlalchemy.orm import Session - -from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_2_0 - -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.cursor import Cursor - - -fake_cursor = MagicMock(name='fake_cursor') -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor, return_value=fake_cursor) - - -class SqlAlchemyBulkTest(TestCase): - - def setUp(self): - self.engine = sa.create_engine('crate://') - Base = declarative_base() - - class Character(Base): - __tablename__ = 'characters' - - name = sa.Column(sa.String, primary_key=True) - age = sa.Column(sa.Integer) - - self.character = Character - self.session = Session(bind=self.engine) - - @skipIf(SA_VERSION >= SA_2_0, "SQLAlchemy 2.x uses modern bulk INSERT mode") - @patch('crate.client.connection.Cursor', FakeCursor) - def test_bulk_save_legacy(self): - """ - Verify legacy SQLAlchemy bulk INSERT mode. - - > bulk_save_objects: Perform a bulk save of the given list of objects. - > This method is a legacy feature as of the 2.0 series of SQLAlchemy. For modern - > bulk INSERT and UPDATE, see the sections ORM Bulk INSERT Statements and ORM Bulk - > UPDATE by Primary Key. - > - > -- https://docs.sqlalchemy.org/orm/session_api.html#sqlalchemy.orm.Session.bulk_save_objects - - > The Session includes legacy methods for performing "bulk" INSERT and UPDATE - > statements. These methods share implementations with the SQLAlchemy 2.0 - > versions of these features, described at ORM Bulk INSERT Statements and - > ORM Bulk UPDATE by Primary Key, however lack many features, namely RETURNING - > support as well as support for session-synchronization. - > - > -- https://docs.sqlalchemy.org/orm/queryguide/dml.html#legacy-session-bulk-insert-methods - - > The 1.4 version of the "ORM bulk insert" methods are really not very efficient and - > don't grant that much of a performance bump vs. regular ORM `session.add()`, provided - > in both cases the objects you provide already have their primary key values assigned. - > SQLAlchemy 2.0 made a much more comprehensive change to how this all works as well so - > that all INSERT methods are essentially extremely fast now, relative to the 1.x series. - > - > -- https://github.com/sqlalchemy/sqlalchemy/discussions/6935#discussioncomment-4789701 - """ - chars = [ - self.character(name='Arthur', age=35), - self.character(name='Banshee', age=26), - self.character(name='Callisto', age=37), - ] - - fake_cursor.description = () - fake_cursor.rowcount = len(chars) - fake_cursor.executemany.return_value = [ - {'rowcount': 1}, - {'rowcount': 1}, - {'rowcount': 1}, - ] - self.session.bulk_save_objects(chars) - (stmt, bulk_args), _ = fake_cursor.executemany.call_args - - expected_stmt = "INSERT INTO characters (name, age) VALUES (?, ?)" - self.assertEqual(expected_stmt, stmt) - - expected_bulk_args = ( - ('Arthur', 35), - ('Banshee', 26), - ('Callisto', 37) - ) - self.assertSequenceEqual(expected_bulk_args, bulk_args) - - @skipIf(SA_VERSION < SA_2_0, "SQLAlchemy 1.x uses legacy bulk INSERT mode") - @patch('crate.client.connection.Cursor', FakeCursor) - def test_bulk_save_modern(self): - """ - Verify modern SQLAlchemy bulk INSERT mode. - - > A list of parameter dictionaries sent to the `Session.execute.params` parameter, - > separate from the Insert object itself, will invoke *bulk INSERT mode* for the - > statement, which essentially means the operation will optimize as much as - > possible for many rows. - > - > -- https://docs.sqlalchemy.org/orm/queryguide/dml.html#orm-queryguide-bulk-insert - - > We have been looking into getting performance optimizations - > from `bulk_save()` to be inherently part of `add_all()`. - > - > -- https://github.com/sqlalchemy/sqlalchemy/discussions/6935#discussioncomment-1233465 - - > The remaining performance limitation, that the `cursor.executemany()` DBAPI method - > does not allow for rows to be fetched, is resolved for most backends by *foregoing* - > the use of `executemany()` and instead restructuring individual INSERT statements - > to each accommodate a large number of rows in a single statement that is invoked - > using `cursor.execute()`. This approach originates from the `psycopg2` fast execution - > helpers feature of the `psycopg2` DBAPI, which SQLAlchemy incrementally added more - > and more support towards in recent release series. - > - > -- https://docs.sqlalchemy.org/core/connections.html#engine-insertmanyvalues - """ - - # Don't truncate unittest's diff output on `assertListEqual`. - self.maxDiff = None - - chars = [ - self.character(name='Arthur', age=35), - self.character(name='Banshee', age=26), - self.character(name='Callisto', age=37), - ] - - fake_cursor.description = () - fake_cursor.rowcount = len(chars) - fake_cursor.execute.return_value = [ - {'rowcount': 1}, - {'rowcount': 1}, - {'rowcount': 1}, - ] - self.session.add_all(chars) - self.session.commit() - (stmt, bulk_args), _ = fake_cursor.execute.call_args - - expected_stmt = "INSERT INTO characters (name, age) VALUES (?, ?), (?, ?), (?, ?)" - self.assertEqual(expected_stmt, stmt) - - expected_bulk_args = ( - 'Arthur', 35, - 'Banshee', 26, - 'Callisto', 37, - ) - self.assertSequenceEqual(expected_bulk_args, bulk_args) - - @skipIf(sys.version_info < (3, 8), "SQLAlchemy/pandas is not supported on Python <3.8") - @skipIf(SA_VERSION < SA_2_0, "SQLAlchemy 1.4 is no longer supported by pandas 2.2") - @patch('crate.client.connection.Cursor', mock_cursor=FakeCursor) - def test_bulk_save_pandas(self, mock_cursor): - """ - Verify bulk INSERT with pandas. - """ - from crate.client.sqlalchemy.support import insert_bulk - from pueblo.testing.pandas import makeTimeDataFrame - - # 42 records / 8 chunksize = 5.25, which means 6 batches will be emitted. - INSERT_RECORDS = 42 - CHUNK_SIZE = 8 - OPCOUNT = math.ceil(INSERT_RECORDS / CHUNK_SIZE) - - # Create a DataFrame to feed into the database. - df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S") - - dburi = "crate://localhost:4200" - engine = sa.create_engine(dburi, echo=True) - retval = df.to_sql( - name="test-testdrive", - con=engine, - if_exists="replace", - index=False, - chunksize=CHUNK_SIZE, - method=insert_bulk, - ) - self.assertIsNone(retval) - - # Initializing the query has an overhead of two calls to the cursor object, probably one - # initial connection from the DB-API driver, to inquire the database version, and another - # one, for SQLAlchemy. SQLAlchemy will use it to inquire the table schema using `information_schema`, - # and to eventually issue the `CREATE TABLE ...` statement. - effective_op_count = mock_cursor.call_count - 2 - - # Verify number of batches. - self.assertEqual(effective_op_count, OPCOUNT) - - @skipIf(sys.version_info < (3, 8), "SQLAlchemy/Dask is not supported on Python <3.8") - @skipIf(SA_VERSION < SA_2_0, "SQLAlchemy 1.4 is no longer supported by pandas 2.2") - @patch('crate.client.connection.Cursor', mock_cursor=FakeCursor) - def test_bulk_save_dask(self, mock_cursor): - """ - Verify bulk INSERT with Dask. - """ - import dask.dataframe as dd - from crate.client.sqlalchemy.support import insert_bulk - from pueblo.testing.pandas import makeTimeDataFrame - - # 42 records / 4 partitions means each partition has a size of 10.5 elements. - # Because the chunk size 8 is slightly smaller than 10, the partition will not - # fit into it, so two batches will be emitted to the database for each data - # partition. 4 partitions * 2 batches = 8 insert operations will be emitted. - # Those settings are a perfect example of non-optimal settings, and have been - # made so on purpose, in order to demonstrate that using optimal settings - # is crucial. - INSERT_RECORDS = 42 - NPARTITIONS = 4 - CHUNK_SIZE = 8 - OPCOUNT = math.ceil(INSERT_RECORDS / NPARTITIONS / CHUNK_SIZE) * NPARTITIONS - - # Create a DataFrame to feed into the database. - df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S") - ddf = dd.from_pandas(df, npartitions=NPARTITIONS) - - dburi = "crate://localhost:4200" - retval = ddf.to_sql( - name="test-testdrive", - uri=dburi, - if_exists="replace", - index=False, - chunksize=CHUNK_SIZE, - method=insert_bulk, - parallel=True, - ) - self.assertIsNone(retval) - - # Each of the insert operation incurs another call to the cursor object. This is probably - # the initial connection from the DB-API driver, to inquire the database version. - # This compensation formula has been determined empirically / by educated guessing. - effective_op_count = (mock_cursor.call_count - 2 * NPARTITIONS) - 2 - - # Verify number of batches. - self.assertEqual(effective_op_count, OPCOUNT) diff --git a/src/crate/client/sqlalchemy/tests/compiler_test.py b/src/crate/client/sqlalchemy/tests/compiler_test.py deleted file mode 100644 index 9c08154b..00000000 --- a/src/crate/client/sqlalchemy/tests/compiler_test.py +++ /dev/null @@ -1,434 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. -import warnings -from textwrap import dedent -from unittest import mock, skipIf, TestCase -from unittest.mock import MagicMock, patch - -from crate.client.cursor import Cursor -from crate.client.sqlalchemy.compiler import crate_before_execute - -import sqlalchemy as sa -from sqlalchemy.sql import text, Update - -from crate.testing.util import ExtraAssertions - -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_1_4, SA_2_0 -from crate.client.sqlalchemy.types import ObjectType -from crate.client.test_util import ParametrizedTestCase - -from crate.testing.settings import crate_host - - -class SqlAlchemyCompilerTest(ParametrizedTestCase, ExtraAssertions): - - def setUp(self): - self.crate_engine = sa.create_engine('crate://') - if isinstance(self.param, dict) and "server_version_info" in self.param: - server_version_info = self.param["server_version_info"] - self.crate_engine.dialect.server_version_info = server_version_info - self.sqlite_engine = sa.create_engine('sqlite://') - self.metadata = sa.MetaData() - self.mytable = sa.Table('mytable', self.metadata, - sa.Column('name', sa.String), - sa.Column('data', ObjectType)) - - self.update = Update(self.mytable).where(text('name=:name')) - self.values = [{'name': 'crate'}] - self.values = (self.values, ) - - def test_sqlite_update_not_rewritten(self): - clauseelement, multiparams, params = crate_before_execute( - self.sqlite_engine, self.update, self.values, {} - ) - - self.assertFalse(hasattr(clauseelement, '_crate_specific')) - - def test_crate_update_rewritten(self): - clauseelement, multiparams, params = crate_before_execute( - self.crate_engine, self.update, self.values, {} - ) - - self.assertTrue(hasattr(clauseelement, '_crate_specific')) - - def test_bulk_update_on_builtin_type(self): - """ - The "before_execute" hook in the compiler doesn't get - access to the parameters in case of a bulk update. It - should not try to optimize any parameters. - """ - data = ({},) - clauseelement, multiparams, params = crate_before_execute( - self.crate_engine, self.update, data, None - ) - - self.assertFalse(hasattr(clauseelement, '_crate_specific')) - - def test_select_with_ilike_no_escape(self): - """ - Verify the compiler uses CrateDB's native `ILIKE` method. - """ - selectable = self.mytable.select().where(self.mytable.c.name.ilike("%foo%")) - statement = str(selectable.compile(bind=self.crate_engine)) - if self.crate_engine.dialect.has_ilike_operator(): - self.assertEqual(statement, dedent(""" - SELECT mytable.name, mytable.data - FROM mytable - WHERE mytable.name ILIKE ? - """).strip()) # noqa: W291 - else: - self.assertEqual(statement, dedent(""" - SELECT mytable.name, mytable.data - FROM mytable - WHERE lower(mytable.name) LIKE lower(?) - """).strip()) # noqa: W291 - - def test_select_with_not_ilike_no_escape(self): - """ - Verify the compiler uses CrateDB's native `ILIKE` method. - """ - selectable = self.mytable.select().where(self.mytable.c.name.notilike("%foo%")) - statement = str(selectable.compile(bind=self.crate_engine)) - if SA_VERSION < SA_1_4 or not self.crate_engine.dialect.has_ilike_operator(): - self.assertEqual(statement, dedent(""" - SELECT mytable.name, mytable.data - FROM mytable - WHERE lower(mytable.name) NOT LIKE lower(?) - """).strip()) # noqa: W291 - else: - self.assertEqual(statement, dedent(""" - SELECT mytable.name, mytable.data - FROM mytable - WHERE mytable.name NOT ILIKE ? - """).strip()) # noqa: W291 - - def test_select_with_ilike_and_escape(self): - """ - Verify the compiler fails when using CrateDB's native `ILIKE` method together with `ESCAPE`. - """ - - selectable = self.mytable.select().where(self.mytable.c.name.ilike("%foo%", escape='\\')) - with self.assertRaises(NotImplementedError) as cmex: - selectable.compile(bind=self.crate_engine) - self.assertEqual(str(cmex.exception), "Unsupported feature: ESCAPE is not supported") - - @skipIf(SA_VERSION < SA_1_4, "SQLAlchemy 1.3 and earlier do not support native `NOT ILIKE` compilation") - def test_select_with_not_ilike_and_escape(self): - """ - Verify the compiler fails when using CrateDB's native `ILIKE` method together with `ESCAPE`. - """ - - selectable = self.mytable.select().where(self.mytable.c.name.notilike("%foo%", escape='\\')) - with self.assertRaises(NotImplementedError) as cmex: - selectable.compile(bind=self.crate_engine) - self.assertEqual(str(cmex.exception), "Unsupported feature: ESCAPE is not supported") - - def test_select_with_offset(self): - """ - Verify the `CrateCompiler.limit_clause` method, with offset. - """ - selectable = self.mytable.select().offset(5) - statement = str(selectable.compile(bind=self.crate_engine)) - if SA_VERSION >= SA_1_4: - self.assertEqual(statement, "SELECT mytable.name, mytable.data \nFROM mytable\n LIMIT ALL OFFSET ?") - else: - self.assertEqual(statement, "SELECT mytable.name, mytable.data \nFROM mytable \n LIMIT ALL OFFSET ?") - - def test_select_with_limit(self): - """ - Verify the `CrateCompiler.limit_clause` method, with limit. - """ - selectable = self.mytable.select().limit(42) - statement = str(selectable.compile(bind=self.crate_engine)) - self.assertEqual(statement, "SELECT mytable.name, mytable.data \nFROM mytable \n LIMIT ?") - - def test_select_with_offset_and_limit(self): - """ - Verify the `CrateCompiler.limit_clause` method, with offset and limit. - """ - selectable = self.mytable.select().offset(5).limit(42) - statement = str(selectable.compile(bind=self.crate_engine)) - self.assertEqual(statement, "SELECT mytable.name, mytable.data \nFROM mytable \n LIMIT ? OFFSET ?") - - def test_insert_multivalues(self): - """ - Verify that "in-place multirow inserts" aka. "multivalues inserts" aka. - the `supports_multivalues_insert` dialect feature works. - - When this feature is not enabled, using it will raise an error: - - CompileError: The 'crate' dialect with current database version - settings does not support in-place multirow inserts - - > The Insert construct also supports being passed a list of dictionaries - > or full-table-tuples, which on the server will render the less common - > SQL syntax of "multiple values" - this syntax is supported on backends - > such as SQLite, PostgreSQL, MySQL, but not necessarily others. - - > It is essential to note that passing multiple values is NOT the same - > as using traditional `executemany()` form. The above syntax is a special - > syntax not typically used. To emit an INSERT statement against - > multiple rows, the normal method is to pass a multiple values list to - > the `Connection.execute()` method, which is supported by all database - > backends and is generally more efficient for a very large number of - > parameters. - - - https://docs.sqlalchemy.org/core/dml.html#sqlalchemy.sql.expression.Insert.values.params.*args - """ - records = [{"name": f"foo_{i}"} for i in range(3)] - insertable = self.mytable.insert().values(records) - statement = str(insertable.compile(bind=self.crate_engine)) - self.assertEqual(statement, "INSERT INTO mytable (name) VALUES (?), (?), (?)") - - @skipIf(SA_VERSION < SA_2_0, "SQLAlchemy 1.x does not support the 'insertmanyvalues' dialect feature") - def test_insert_manyvalues(self): - """ - Verify the `use_insertmanyvalues` and `use_insertmanyvalues_wo_returning` dialect features. - - > For DML statements such as "INSERT", "UPDATE" and "DELETE", we can - > send multiple parameter sets to the `Connection.execute()` method by - > passing a list of dictionaries instead of a single dictionary, which - > indicates that the single SQL statement should be invoked multiple - > times, once for each parameter set. This style of execution is known - > as "executemany". - - > A key characteristic of "insertmanyvalues" is that the size of the INSERT - > statement is limited on a fixed max number of "values" clauses as well as - > a dialect-specific fixed total number of bound parameters that may be - > represented in one INSERT statement at a time. - > When the number of parameter dictionaries given exceeds a fixed limit [...], - > multiple INSERT statements will be invoked within the scope of a single - > `Connection.execute()` call, each of which accommodate for a portion of the - > parameter dictionaries, referred towards as a "batch". - - - https://docs.sqlalchemy.org/tutorial/dbapi_transactions.html#tutorial-multiple-parameters - - https://docs.sqlalchemy.org/glossary.html#term-executemany - - https://docs.sqlalchemy.org/core/connections.html#engine-insertmanyvalues - - https://docs.sqlalchemy.org/core/connections.html#controlling-the-batch-size - """ - - # Don't truncate unittest's diff output on `assertListEqual`. - self.maxDiff = None - - # Five records with a batch size of two should produce three `INSERT` statements. - record_count = 5 - batch_size = 2 - - # Prepare input data and verify insert statement. - records = [{"name": f"foo_{i}"} for i in range(record_count)] - insertable = self.mytable.insert() - statement = str(insertable.compile(bind=self.crate_engine)) - self.assertEqual(statement, "INSERT INTO mytable (name, data) VALUES (?, ?)") - - with mock.patch("crate.client.http.Client.sql", autospec=True, return_value={"cols": []}) as client_mock: - - with self.crate_engine.begin() as conn: - # Adjust page size on a per-connection level. - conn.execution_options(insertmanyvalues_page_size=batch_size) - conn.execute(insertable, parameters=records) - - # Verify that input data has been batched correctly. - self.assertListEqual(client_mock.mock_calls, [ - mock.call(mock.ANY, 'INSERT INTO mytable (name) VALUES (?), (?)', ('foo_0', 'foo_1'), None), - mock.call(mock.ANY, 'INSERT INTO mytable (name) VALUES (?), (?)', ('foo_2', 'foo_3'), None), - mock.call(mock.ANY, 'INSERT INTO mytable (name) VALUES (?)', ('foo_4', ), None), - ]) - - def test_for_update(self): - """ - Verify the `CrateCompiler.for_update_clause` method to - omit the clause, since CrateDB does not support it. - """ - - with warnings.catch_warnings(record=True) as w: - - # By default, warnings from a loop will only be emitted once. - # This scenario tests exactly this behaviour, to verify logs - # don't get flooded. - warnings.simplefilter("once") - - selectable = self.mytable.select().with_for_update() - _ = str(selectable.compile(bind=self.crate_engine)) - - selectable = self.mytable.select().with_for_update() - statement = str(selectable.compile(bind=self.crate_engine)) - - # Verify SQL statement. - self.assertEqual(statement, "SELECT mytable.name, mytable.data \nFROM mytable") - - # Verify if corresponding warning is emitted, once. - self.assertEqual(len(w), 1) - self.assertIsSubclass(w[-1].category, UserWarning) - self.assertIn("CrateDB does not support the 'INSERT ... FOR UPDATE' clause, " - "it will be omitted when generating SQL statements.", str(w[-1].message)) - - -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) - - -class CompilerTestCase(TestCase): - """ - A base class for providing mocking infrastructure to validate the DDL compiler. - """ - - def setUp(self): - self.engine = sa.create_engine(f"crate://{crate_host}") - self.metadata = sa.MetaData(schema="testdrive") - self.session = sa.orm.Session(bind=self.engine) - self.setup_mock() - - def setup_mock(self): - """ - Set up a fake cursor, in order to intercept query execution. - """ - - self.fake_cursor = MagicMock(name="fake_cursor") - FakeCursor.return_value = self.fake_cursor - - self.executed_statement = None - self.fake_cursor.execute = self.execute_wrapper - - def execute_wrapper(self, query, *args, **kwargs): - """ - Receive the SQL query expression, and store it. - """ - self.executed_statement = query - return self.fake_cursor - - -@patch('crate.client.connection.Cursor', FakeCursor) -class SqlAlchemyDDLCompilerTest(CompilerTestCase, ExtraAssertions): - """ - Verify a few scenarios regarding the DDL compiler. - """ - - def test_ddl_with_foreign_keys(self): - """ - Verify the CrateDB dialect properly ignores foreign key constraints. - """ - - Base = declarative_base(metadata=self.metadata) - - class RootStore(Base): - """The main store.""" - - __tablename__ = "root" - - id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column(sa.String) - - items = sa.orm.relationship( - "ItemStore", - back_populates="root", - passive_deletes=True, - ) - - class ItemStore(Base): - """The auxiliary store.""" - - __tablename__ = "item" - - id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column(sa.String) - root_id = sa.Column( - sa.Integer, - sa.ForeignKey( - f"{RootStore.__tablename__}.id", - ondelete="CASCADE", - ), - ) - root = sa.orm.relationship(RootStore, back_populates="items") - - with warnings.catch_warnings(record=True) as w: - - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - - # Verify SQL DDL statement. - self.metadata.create_all(self.engine, tables=[RootStore.__table__], checkfirst=False) - self.assertEqual(self.executed_statement, dedent(""" - CREATE TABLE testdrive.root ( - \tid INT NOT NULL, - \tname STRING, - \tPRIMARY KEY (id) - ) - - """)) # noqa: W291, W293 - - # Verify SQL DDL statement. - self.metadata.create_all(self.engine, tables=[ItemStore.__table__], checkfirst=False) - self.assertEqual(self.executed_statement, dedent(""" - CREATE TABLE testdrive.item ( - \tid INT NOT NULL, - \tname STRING, - \troot_id INT, - \tPRIMARY KEY (id) - ) - - """)) # noqa: W291, W293 - - # Verify if corresponding warning is emitted. - self.assertEqual(len(w), 1) - self.assertIsSubclass(w[-1].category, UserWarning) - self.assertIn("CrateDB does not support foreign key constraints, " - "they will be omitted when generating DDL statements.", str(w[-1].message)) - - def test_ddl_with_unique_key(self): - """ - Verify the CrateDB dialect properly ignores unique key constraints. - """ - - Base = declarative_base(metadata=self.metadata) - - class FooBar(Base): - """The entity.""" - - __tablename__ = "foobar" - - id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column(sa.String, unique=True) - - with warnings.catch_warnings(record=True) as w: - - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - - # Verify SQL DDL statement. - self.metadata.create_all(self.engine, tables=[FooBar.__table__], checkfirst=False) - self.assertEqual(self.executed_statement, dedent(""" - CREATE TABLE testdrive.foobar ( - \tid INT NOT NULL, - \tname STRING, - \tPRIMARY KEY (id) - ) - - """)) # noqa: W291, W293 - - # Verify if corresponding warning is emitted. - self.assertEqual(len(w), 1) - self.assertIsSubclass(w[-1].category, UserWarning) - self.assertIn("CrateDB does not support unique constraints, " - "they will be omitted when generating DDL statements.", str(w[-1].message)) diff --git a/src/crate/client/sqlalchemy/tests/connection_test.py b/src/crate/client/sqlalchemy/tests/connection_test.py deleted file mode 100644 index f1a560e9..00000000 --- a/src/crate/client/sqlalchemy/tests/connection_test.py +++ /dev/null @@ -1,129 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from unittest import TestCase -import sqlalchemy as sa -from sqlalchemy.exc import NoSuchModuleError - - -class SqlAlchemyConnectionTest(TestCase): - - def test_connection_server_uri_unknown_sa_plugin(self): - with self.assertRaises(NoSuchModuleError): - sa.create_engine("foobar://otherhost:19201") - - def test_default_connection(self): - engine = sa.create_engine('crate://') - conn = engine.raw_connection() - self.assertEqual(">", - repr(conn.driver_connection)) - conn.close() - engine.dispose() - - def test_connection_server_uri_http(self): - engine = sa.create_engine( - "crate://otherhost:19201") - conn = engine.raw_connection() - self.assertEqual(">", - repr(conn.driver_connection)) - conn.close() - engine.dispose() - - def test_connection_server_uri_https(self): - engine = sa.create_engine( - "crate://otherhost:19201/?ssl=true") - conn = engine.raw_connection() - self.assertEqual(">", - repr(conn.driver_connection)) - conn.close() - engine.dispose() - - def test_connection_server_uri_invalid_port(self): - with self.assertRaises(ValueError) as context: - sa.create_engine("crate://foo:bar") - self.assertIn("invalid literal for int() with base 10: 'bar'", str(context.exception)) - - def test_connection_server_uri_https_with_trusted_user(self): - engine = sa.create_engine( - "crate://foo@otherhost:19201/?ssl=true") - conn = engine.raw_connection() - self.assertEqual(">", - repr(conn.driver_connection)) - self.assertEqual(conn.driver_connection.client.username, "foo") - self.assertEqual(conn.driver_connection.client.password, None) - conn.close() - engine.dispose() - - def test_connection_server_uri_https_with_credentials(self): - engine = sa.create_engine( - "crate://foo:bar@otherhost:19201/?ssl=true") - conn = engine.raw_connection() - self.assertEqual(">", - repr(conn.driver_connection)) - self.assertEqual(conn.driver_connection.client.username, "foo") - self.assertEqual(conn.driver_connection.client.password, "bar") - conn.close() - engine.dispose() - - def test_connection_server_uri_parameter_timeout(self): - engine = sa.create_engine( - "crate://otherhost:19201/?timeout=42.42") - conn = engine.raw_connection() - self.assertEqual(conn.driver_connection.client._pool_kw["timeout"], 42.42) - conn.close() - engine.dispose() - - def test_connection_server_uri_parameter_pool_size(self): - engine = sa.create_engine( - "crate://otherhost:19201/?pool_size=20") - conn = engine.raw_connection() - self.assertEqual(conn.driver_connection.client._pool_kw["maxsize"], 20) - conn.close() - engine.dispose() - - def test_connection_multiple_server_http(self): - engine = sa.create_engine( - "crate://", connect_args={ - 'servers': ['localhost:4201', 'localhost:4202'] - } - ) - conn = engine.raw_connection() - self.assertEqual( - ">", - repr(conn.driver_connection)) - conn.close() - engine.dispose() - - def test_connection_multiple_server_https(self): - engine = sa.create_engine( - "crate://", connect_args={ - 'servers': ['localhost:4201', 'localhost:4202'], - 'ssl': True, - } - ) - conn = engine.raw_connection() - self.assertEqual( - ">", - repr(conn.driver_connection)) - conn.close() - engine.dispose() diff --git a/src/crate/client/sqlalchemy/tests/create_table_test.py b/src/crate/client/sqlalchemy/tests/create_table_test.py deleted file mode 100644 index 4c6072aa..00000000 --- a/src/crate/client/sqlalchemy/tests/create_table_test.py +++ /dev/null @@ -1,313 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -import sqlalchemy as sa -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.sqlalchemy.types import ObjectType, ObjectArray, Geopoint -from crate.client.cursor import Cursor - -from unittest import TestCase -from unittest.mock import patch, MagicMock - - -fake_cursor = MagicMock(name='fake_cursor') -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) -FakeCursor.return_value = fake_cursor - - -@patch('crate.client.connection.Cursor', FakeCursor) -class SqlAlchemyCreateTableTest(TestCase): - - def setUp(self): - self.engine = sa.create_engine('crate://') - self.Base = declarative_base() - - def test_table_basic_types(self): - class User(self.Base): - __tablename__ = 'users' - string_col = sa.Column(sa.String, primary_key=True) - unicode_col = sa.Column(sa.Unicode) - text_col = sa.Column(sa.Text) - int_col = sa.Column(sa.Integer) - long_col1 = sa.Column(sa.BigInteger) - long_col2 = sa.Column(sa.NUMERIC) - bool_col = sa.Column(sa.Boolean) - short_col = sa.Column(sa.SmallInteger) - datetime_col = sa.Column(sa.DateTime) - date_col = sa.Column(sa.Date) - float_col = sa.Column(sa.Float) - double_col = sa.Column(sa.DECIMAL) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE users (\n\tstring_col STRING NOT NULL, ' - '\n\tunicode_col STRING, \n\ttext_col STRING, \n\tint_col INT, ' - '\n\tlong_col1 LONG, \n\tlong_col2 LONG, ' - '\n\tbool_col BOOLEAN, ' - '\n\tshort_col SHORT, ' - '\n\tdatetime_col TIMESTAMP, \n\tdate_col TIMESTAMP, ' - '\n\tfloat_col FLOAT, \n\tdouble_col DOUBLE, ' - '\n\tPRIMARY KEY (string_col)\n)\n\n'), - ()) - - def test_column_obj(self): - class DummyTable(self.Base): - __tablename__ = 'dummy' - pk = sa.Column(sa.String, primary_key=True) - obj_col = sa.Column(ObjectType) - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE dummy (\n\tpk STRING NOT NULL, \n\tobj_col OBJECT, ' - '\n\tPRIMARY KEY (pk)\n)\n\n'), - ()) - - def test_table_clustered_by(self): - class DummyTable(self.Base): - __tablename__ = 't' - __table_args__ = { - 'crate_clustered_by': 'p' - } - pk = sa.Column(sa.String, primary_key=True) - p = sa.Column(sa.String) - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'p STRING, \n\t' - 'PRIMARY KEY (pk)\n' - ') CLUSTERED BY (p)\n\n'), - ()) - - def test_column_computed(self): - class DummyTable(self.Base): - __tablename__ = 't' - ts = sa.Column(sa.BigInteger, primary_key=True) - p = sa.Column(sa.BigInteger, sa.Computed("date_trunc('day', ts)")) - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'ts LONG NOT NULL, \n\t' - 'p LONG GENERATED ALWAYS AS (date_trunc(\'day\', ts)), \n\t' - 'PRIMARY KEY (ts)\n' - ')\n\n'), - ()) - - def test_column_computed_virtual(self): - class DummyTable(self.Base): - __tablename__ = 't' - ts = sa.Column(sa.BigInteger, primary_key=True) - p = sa.Column(sa.BigInteger, sa.Computed("date_trunc('day', ts)", persisted=False)) - with self.assertRaises(sa.exc.CompileError): - self.Base.metadata.create_all(bind=self.engine) - - def test_table_partitioned_by(self): - class DummyTable(self.Base): - __tablename__ = 't' - __table_args__ = { - 'crate_partitioned_by': 'p', - 'invalid_option': 1 - } - pk = sa.Column(sa.String, primary_key=True) - p = sa.Column(sa.String) - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'p STRING, \n\t' - 'PRIMARY KEY (pk)\n' - ') PARTITIONED BY (p)\n\n'), - ()) - - def test_table_number_of_shards_and_replicas(self): - class DummyTable(self.Base): - __tablename__ = 't' - __table_args__ = { - 'crate_number_of_replicas': '2', - 'crate_number_of_shards': 3 - } - pk = sa.Column(sa.String, primary_key=True) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'PRIMARY KEY (pk)\n' - ') CLUSTERED INTO 3 SHARDS WITH (NUMBER_OF_REPLICAS = 2)\n\n'), - ()) - - def test_table_clustered_by_and_number_of_shards(self): - class DummyTable(self.Base): - __tablename__ = 't' - __table_args__ = { - 'crate_clustered_by': 'p', - 'crate_number_of_shards': 3 - } - pk = sa.Column(sa.String, primary_key=True) - p = sa.Column(sa.String, primary_key=True) - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'p STRING NOT NULL, \n\t' - 'PRIMARY KEY (pk, p)\n' - ') CLUSTERED BY (p) INTO 3 SHARDS\n\n'), - ()) - - def test_column_object_array(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - tags = sa.Column(ObjectArray) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'tags ARRAY(OBJECT), \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) - - def test_column_nullable(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(sa.Integer, nullable=True) - b = sa.Column(sa.Integer, nullable=False) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'a INT, \n\t' - 'b INT NOT NULL, \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) - - def test_column_pk_nullable(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True, nullable=True) - with self.assertRaises(sa.exc.CompileError): - self.Base.metadata.create_all(bind=self.engine) - - def test_column_crate_index(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(sa.Integer, crate_index=False) - b = sa.Column(sa.Integer, crate_index=True) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'a INT INDEX OFF, \n\t' - 'b INT, \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) - - def test_column_geopoint_without_index(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(Geopoint, crate_index=False) - with self.assertRaises(sa.exc.CompileError): - self.Base.metadata.create_all(bind=self.engine) - - def test_text_column_without_columnstore(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(sa.String, crate_columnstore=False) - b = sa.Column(sa.String, crate_columnstore=True) - c = sa.Column(sa.String) - - self.Base.metadata.create_all(bind=self.engine) - - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'a STRING STORAGE WITH (columnstore = false), \n\t' - 'b STRING, \n\t' - 'c STRING, \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) - - def test_non_text_column_without_columnstore(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(sa.Integer, crate_columnstore=False) - - with self.assertRaises(sa.exc.CompileError): - self.Base.metadata.create_all(bind=self.engine) - - def test_column_server_default_text_func(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(sa.DateTime, server_default=sa.text("now()")) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'a TIMESTAMP DEFAULT now(), \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) - - def test_column_server_default_string(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(sa.String, server_default="Zaphod") - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'a STRING DEFAULT \'Zaphod\', \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) - - def test_column_server_default_func(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - a = sa.Column(sa.DateTime, server_default=sa.func.now()) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'a TIMESTAMP DEFAULT now(), \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) - - def test_column_server_default_text_constant(self): - class DummyTable(self.Base): - __tablename__ = 't' - pk = sa.Column(sa.String, primary_key=True) - answer = sa.Column(sa.Integer, server_default=sa.text("42")) - - self.Base.metadata.create_all(bind=self.engine) - fake_cursor.execute.assert_called_with( - ('\nCREATE TABLE t (\n\t' - 'pk STRING NOT NULL, \n\t' - 'answer INT DEFAULT 42, \n\t' - 'PRIMARY KEY (pk)\n)\n\n'), ()) diff --git a/src/crate/client/sqlalchemy/tests/datetime_test.py b/src/crate/client/sqlalchemy/tests/datetime_test.py deleted file mode 100644 index 07e98ede..00000000 --- a/src/crate/client/sqlalchemy/tests/datetime_test.py +++ /dev/null @@ -1,90 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from __future__ import absolute_import -from datetime import datetime, tzinfo, timedelta -from unittest import TestCase -from unittest.mock import patch, MagicMock - -import sqlalchemy as sa -from sqlalchemy.exc import DBAPIError -from sqlalchemy.orm import Session -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.cursor import Cursor - - -fake_cursor = MagicMock(name='fake_cursor') -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) -FakeCursor.return_value = fake_cursor - - -class CST(tzinfo): - """ - Timezone object for CST - """ - - def utcoffset(self, date_time): - return timedelta(seconds=-3600) - - def dst(self, date_time): - return timedelta(seconds=-7200) - - -@patch('crate.client.connection.Cursor', FakeCursor) -class SqlAlchemyDateAndDateTimeTest(TestCase): - - def setUp(self): - self.engine = sa.create_engine('crate://') - Base = declarative_base() - - class Character(Base): - __tablename__ = 'characters' - name = sa.Column(sa.String, primary_key=True) - date = sa.Column(sa.Date) - timestamp = sa.Column(sa.DateTime) - - fake_cursor.description = ( - ('characters_name', None, None, None, None, None, None), - ('characters_date', None, None, None, None, None, None) - ) - self.session = Session(bind=self.engine) - self.Character = Character - - def test_date_can_handle_datetime(self): - """ date type should also be able to handle iso datetime strings. - - this verifies that the fallback in the Date result_processor works. - """ - fake_cursor.fetchall.return_value = [ - ('Trillian', '2013-07-16T00:00:00.000Z') - ] - self.session.query(self.Character).first() - - def test_date_cannot_handle_tz_aware_datetime(self): - character = self.Character() - character.name = "Athur" - character.timestamp = datetime(2009, 5, 13, 19, 19, 30, tzinfo=CST()) - self.session.add(character) - self.assertRaises(DBAPIError, self.session.commit) diff --git a/src/crate/client/sqlalchemy/tests/dialect_test.py b/src/crate/client/sqlalchemy/tests/dialect_test.py deleted file mode 100644 index bdcfc838..00000000 --- a/src/crate/client/sqlalchemy/tests/dialect_test.py +++ /dev/null @@ -1,156 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from datetime import datetime -from unittest import TestCase, skipIf -from unittest.mock import MagicMock, patch - -import sqlalchemy as sa - -from crate.client.cursor import Cursor -from crate.client.sqlalchemy import SA_VERSION -from crate.client.sqlalchemy.sa_version import SA_1_4, SA_2_0 -from crate.client.sqlalchemy.types import ObjectType -from sqlalchemy import inspect -from sqlalchemy.orm import Session -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.testing import eq_, in_, is_true - -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) - - -@patch('crate.client.connection.Cursor', FakeCursor) -class SqlAlchemyDialectTest(TestCase): - - def execute_wrapper(self, query, *args, **kwargs): - self.executed_statement = query - return self.fake_cursor - - def setUp(self): - - self.fake_cursor = MagicMock(name='fake_cursor') - FakeCursor.return_value = self.fake_cursor - - self.engine = sa.create_engine('crate://') - - self.executed_statement = None - - self.connection = self.engine.connect() - - self.fake_cursor.execute = self.execute_wrapper - - self.base = declarative_base() - - class Character(self.base): - __tablename__ = 'characters' - - name = sa.Column(sa.String, primary_key=True) - age = sa.Column(sa.Integer, primary_key=True) - obj = sa.Column(ObjectType) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) - - self.session = Session(bind=self.engine) - - def init_mock(self, return_value=None): - self.fake_cursor.rowcount = 1 - self.fake_cursor.description = ( - ('foo', None, None, None, None, None, None), - ) - self.fake_cursor.fetchall = MagicMock(return_value=return_value) - - def test_primary_keys_2_3_0(self): - insp = inspect(self.session.bind) - self.engine.dialect.server_version_info = (2, 3, 0) - - self.fake_cursor.rowcount = 3 - self.fake_cursor.description = ( - ('foo', None, None, None, None, None, None), - ) - self.fake_cursor.fetchall = MagicMock(return_value=[["id"], ["id2"], ["id3"]]) - - eq_(insp.get_pk_constraint("characters")['constrained_columns'], {"id", "id2", "id3"}) - self.fake_cursor.fetchall.assert_called_once_with() - in_("information_schema.key_column_usage", self.executed_statement) - in_("table_catalog = ?", self.executed_statement) - - def test_primary_keys_3_0_0(self): - insp = inspect(self.session.bind) - self.engine.dialect.server_version_info = (3, 0, 0) - - self.fake_cursor.rowcount = 3 - self.fake_cursor.description = ( - ('foo', None, None, None, None, None, None), - ) - self.fake_cursor.fetchall = MagicMock(return_value=[["id"], ["id2"], ["id3"]]) - - eq_(insp.get_pk_constraint("characters")['constrained_columns'], {"id", "id2", "id3"}) - self.fake_cursor.fetchall.assert_called_once_with() - in_("information_schema.key_column_usage", self.executed_statement) - in_("table_schema = ?", self.executed_statement) - - def test_get_table_names(self): - self.fake_cursor.rowcount = 1 - self.fake_cursor.description = ( - ('foo', None, None, None, None, None, None), - ) - self.fake_cursor.fetchall = MagicMock(return_value=[["t1"], ["t2"]]) - - insp = inspect(self.session.bind) - self.engine.dialect.server_version_info = (2, 0, 0) - eq_(insp.get_table_names(schema="doc"), - ['t1', 't2']) - in_("WHERE table_schema = ? AND table_type = 'BASE TABLE' ORDER BY", self.executed_statement) - - def test_get_view_names(self): - self.fake_cursor.rowcount = 1 - self.fake_cursor.description = ( - ('foo', None, None, None, None, None, None), - ) - self.fake_cursor.fetchall = MagicMock(return_value=[["v1"], ["v2"]]) - - insp = inspect(self.session.bind) - self.engine.dialect.server_version_info = (2, 0, 0) - eq_(insp.get_view_names(schema="doc"), - ['v1', 'v2']) - eq_(self.executed_statement, "SELECT table_name FROM information_schema.views " - "ORDER BY table_name ASC, table_schema ASC") - - @skipIf(SA_VERSION < SA_1_4, "Inspector.has_table only available on SQLAlchemy>=1.4") - def test_has_table(self): - self.init_mock(return_value=[["foo"], ["bar"]]) - insp = inspect(self.session.bind) - is_true(insp.has_table("bar")) - eq_(self.executed_statement, - "SELECT table_name FROM information_schema.tables " - "WHERE table_schema = ? AND table_type = 'BASE TABLE' " - "ORDER BY table_name ASC, table_schema ASC") - - @skipIf(SA_VERSION < SA_2_0, "Inspector.has_schema only available on SQLAlchemy>=2.0") - def test_has_schema(self): - self.init_mock( - return_value=[["blob"], ["doc"], ["information_schema"], ["pg_catalog"], ["sys"]]) - insp = inspect(self.session.bind) - is_true(insp.has_schema("doc")) - eq_(self.executed_statement, - "select schema_name from information_schema.schemata order by schema_name asc") diff --git a/src/crate/client/sqlalchemy/tests/dict_test.py b/src/crate/client/sqlalchemy/tests/dict_test.py deleted file mode 100644 index 9695882b..00000000 --- a/src/crate/client/sqlalchemy/tests/dict_test.py +++ /dev/null @@ -1,460 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from __future__ import absolute_import -from unittest import TestCase -from unittest.mock import patch, MagicMock - -import sqlalchemy as sa -from sqlalchemy.sql import select -from sqlalchemy.orm import Session -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.sqlalchemy.types import ObjectArray, ObjectType -from crate.client.cursor import Cursor - - -fake_cursor = MagicMock(name='fake_cursor') -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) -FakeCursor.return_value = fake_cursor - - -class SqlAlchemyDictTypeTest(TestCase): - - def setUp(self): - self.engine = sa.create_engine('crate://') - metadata = sa.MetaData() - self.mytable = sa.Table('mytable', metadata, - sa.Column('name', sa.String), - sa.Column('data', ObjectType)) - - def assertSQL(self, expected_str, selectable): - actual_expr = selectable.compile(bind=self.engine) - self.assertEqual(expected_str, str(actual_expr).replace('\n', '')) - - def test_select_with_dict_column(self): - mytable = self.mytable - self.assertSQL( - "SELECT mytable.data['x'] AS anon_1 FROM mytable", - select(mytable.c.data['x']) - ) - - def test_select_with_dict_column_where_clause(self): - mytable = self.mytable - s = select(mytable.c.data).\ - where(mytable.c.data['x'] == 1) - self.assertSQL( - "SELECT mytable.data FROM mytable WHERE mytable.data['x'] = ?", - s - ) - - def test_select_with_dict_column_nested_where(self): - mytable = self.mytable - s = select(mytable.c.name) - s = s.where(mytable.c.data['x']['y'] == 1) - self.assertSQL( - "SELECT mytable.name FROM mytable " + - "WHERE mytable.data['x']['y'] = ?", - s - ) - - def test_select_with_dict_column_where_clause_gt(self): - mytable = self.mytable - s = select(mytable.c.data).\ - where(mytable.c.data['x'] > 1) - self.assertSQL( - "SELECT mytable.data FROM mytable WHERE mytable.data['x'] > ?", - s - ) - - def test_select_with_dict_column_where_clause_other_col(self): - mytable = self.mytable - s = select(mytable.c.name) - s = s.where(mytable.c.data['x'] == mytable.c.name) - self.assertSQL( - "SELECT mytable.name FROM mytable " + - "WHERE mytable.data['x'] = mytable.name", - s - ) - - def test_update_with_dict_column(self): - mytable = self.mytable - stmt = mytable.update().\ - where(mytable.c.name == 'Arthur Dent').\ - values({ - "data['x']": "Trillian" - }) - self.assertSQL( - "UPDATE mytable SET data['x'] = ? WHERE mytable.name = ?", - stmt - ) - - def set_up_character_and_cursor(self, return_value=None): - return_value = return_value or [('Trillian', {})] - fake_cursor.fetchall.return_value = return_value - fake_cursor.description = ( - ('characters_name', None, None, None, None, None, None), - ('characters_data', None, None, None, None, None, None) - ) - fake_cursor.rowcount = 1 - Base = declarative_base() - - class Character(Base): - __tablename__ = 'characters' - name = sa.Column(sa.String, primary_key=True) - age = sa.Column(sa.Integer) - data = sa.Column(ObjectType) - data_list = sa.Column(ObjectArray) - - session = Session(bind=self.engine) - return session, Character - - def test_assign_null_to_object_array(self): - session, Character = self.set_up_character_and_cursor() - char_1 = Character(name='Trillian', data_list=None) - self.assertIsNone(char_1.data_list) - char_2 = Character(name='Trillian', data_list=1) - self.assertEqual(char_2.data_list, [1]) - char_3 = Character(name='Trillian', data_list=[None]) - self.assertEqual(char_3.data_list, [None]) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_assign_to_object_type_after_commit(self): - session, Character = self.set_up_character_and_cursor( - return_value=[('Trillian', None)] - ) - char = Character(name='Trillian') - session.add(char) - session.commit() - char.data = {'x': 1} - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - "UPDATE characters SET data = ? WHERE characters.name = ?", - ({'x': 1}, 'Trillian',) - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_change_tracking(self): - session, Character = self.set_up_character_and_cursor() - char = Character(name='Trillian') - session.add(char) - session.commit() - - try: - char.data['x'] = 1 - except Exception: - print(fake_cursor.fetchall.called) - print(fake_cursor.mock_calls) - raise - - self.assertIn(char, session.dirty) - try: - session.commit() - except Exception: - print(fake_cursor.mock_calls) - raise - self.assertNotIn(char, session.dirty) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_partial_dict_update(self): - session, Character = self.set_up_character_and_cursor() - char = Character(name='Trillian') - session.add(char) - session.commit() - char.data['x'] = 1 - char.data['y'] = 2 - session.commit() - - # on python 3 dicts aren't sorted so the order if x or y is updated - # first isn't deterministic - try: - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['y'] = ?, data['x'] = ? " - "WHERE characters.name = ?"), - (2, 1, 'Trillian') - ) - except AssertionError: - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['x'] = ?, data['y'] = ? " - "WHERE characters.name = ?"), - (1, 2, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_partial_dict_update_only_one_key_changed(self): - """ - If only one attribute of Crate is changed - the update should only update that attribute - not all attributes of Crate. - """ - session, Character = self.set_up_character_and_cursor( - return_value=[('Trillian', dict(x=1, y=2))] - ) - - char = Character(name='Trillian') - char.data = dict(x=1, y=2) - session.add(char) - session.commit() - char.data['y'] = 3 - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['y'] = ? " - "WHERE characters.name = ?"), - (3, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_partial_dict_update_with_regular_column(self): - session, Character = self.set_up_character_and_cursor() - - char = Character(name='Trillian') - session.add(char) - session.commit() - char.data['x'] = 1 - char.age = 20 - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET age = ?, data['x'] = ? " - "WHERE characters.name = ?"), - (20, 1, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_partial_dict_update_with_delitem(self): - session, Character = self.set_up_character_and_cursor( - return_value=[('Trillian', {'x': 1})] - ) - - char = Character(name='Trillian') - char.data = {'x': 1} - session.add(char) - session.commit() - del char.data['x'] - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['x'] = ? " - "WHERE characters.name = ?"), - (None, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_partial_dict_update_with_delitem_setitem(self): - """ test that the change tracking doesn't get messed up - - delitem -> setitem - """ - session, Character = self.set_up_character_and_cursor( - return_value=[('Trillian', {'x': 1})] - ) - - session = Session(bind=self.engine) - char = Character(name='Trillian') - char.data = {'x': 1} - session.add(char) - session.commit() - del char.data['x'] - char.data['x'] = 4 - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['x'] = ? " - "WHERE characters.name = ?"), - (4, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_partial_dict_update_with_setitem_delitem(self): - """ test that the change tracking doesn't get messed up - - setitem -> delitem - """ - session, Character = self.set_up_character_and_cursor( - return_value=[('Trillian', {'x': 1})] - ) - - char = Character(name='Trillian') - char.data = {'x': 1} - session.add(char) - session.commit() - char.data['x'] = 4 - del char.data['x'] - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['x'] = ? " - "WHERE characters.name = ?"), - (None, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_partial_dict_update_with_setitem_delitem_setitem(self): - """ test that the change tracking doesn't get messed up - - setitem -> delitem -> setitem - """ - session, Character = self.set_up_character_and_cursor( - return_value=[('Trillian', {'x': 1})] - ) - - char = Character(name='Trillian') - char.data = {'x': 1} - session.add(char) - session.commit() - char.data['x'] = 4 - del char.data['x'] - char.data['x'] = 3 - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['x'] = ? " - "WHERE characters.name = ?"), - (3, 'Trillian') - ) - - def set_up_character_and_cursor_data_list(self, return_value=None): - return_value = return_value or [('Trillian', {})] - fake_cursor.fetchall.return_value = return_value - fake_cursor.description = ( - ('characters_name', None, None, None, None, None, None), - ('characters_data_list', None, None, None, None, None, None) - - ) - fake_cursor.rowcount = 1 - Base = declarative_base() - - class Character(Base): - __tablename__ = 'characters' - name = sa.Column(sa.String, primary_key=True) - data_list = sa.Column(ObjectArray) - - session = Session(bind=self.engine) - return session, Character - - def _setup_object_array_char(self): - session, Character = self.set_up_character_and_cursor_data_list( - return_value=[('Trillian', [{'1': 1}, {'2': 2}])] - ) - char = Character(name='Trillian', data_list=[{'1': 1}, {'2': 2}]) - session.add(char) - session.commit() - return session, char - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_object_array_setitem_change_tracking(self): - session, char = self._setup_object_array_char() - char.data_list[1] = {'3': 3} - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data_list = ? " - "WHERE characters.name = ?"), - ([{'1': 1}, {'3': 3}], 'Trillian') - ) - - def _setup_nested_object_char(self): - session, Character = self.set_up_character_and_cursor( - return_value=[('Trillian', {'nested': {'x': 1, 'y': {'z': 2}}})] - ) - char = Character(name='Trillian') - char.data = {'nested': {'x': 1, 'y': {'z': 2}}} - session.add(char) - session.commit() - return session, char - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_nested_object_change_tracking(self): - session, char = self._setup_nested_object_char() - char.data["nested"]["x"] = 3 - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['nested'] = ? " - "WHERE characters.name = ?"), - ({'y': {'z': 2}, 'x': 3}, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_deep_nested_object_change_tracking(self): - session, char = self._setup_nested_object_char() - # change deep nested object - char.data["nested"]["y"]["z"] = 5 - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['nested'] = ? " - "WHERE characters.name = ?"), - ({'y': {'z': 5}, 'x': 1}, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_delete_nested_object_tracking(self): - session, char = self._setup_nested_object_char() - # delete nested object - del char.data["nested"]["y"]["z"] - self.assertIn(char, session.dirty) - session.commit() - fake_cursor.execute.assert_called_with( - ("UPDATE characters SET data['nested'] = ? " - "WHERE characters.name = ?"), - ({'y': {}, 'x': 1}, 'Trillian') - ) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_object_array_append_change_tracking(self): - session, char = self._setup_object_array_char() - char.data_list.append({'3': 3}) - self.assertIn(char, session.dirty) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_object_array_insert_change_tracking(self): - session, char = self._setup_object_array_char() - char.data_list.insert(0, {'3': 3}) - self.assertIn(char, session.dirty) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_object_array_slice_change_tracking(self): - session, char = self._setup_object_array_char() - char.data_list[:] = [{'3': 3}] - self.assertIn(char, session.dirty) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_object_array_extend_change_tracking(self): - session, char = self._setup_object_array_char() - char.data_list.extend([{'3': 3}]) - self.assertIn(char, session.dirty) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_object_array_pop_change_tracking(self): - session, char = self._setup_object_array_char() - char.data_list.pop() - self.assertIn(char, session.dirty) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_object_array_remove_change_tracking(self): - session, char = self._setup_object_array_char() - item = char.data_list[0] - char.data_list.remove(item) - self.assertIn(char, session.dirty) diff --git a/src/crate/client/sqlalchemy/tests/function_test.py b/src/crate/client/sqlalchemy/tests/function_test.py deleted file mode 100644 index 072ab43a..00000000 --- a/src/crate/client/sqlalchemy/tests/function_test.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from unittest import TestCase - -import sqlalchemy as sa -from sqlalchemy.sql.sqltypes import TIMESTAMP -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - - -class SqlAlchemyFunctionTest(TestCase): - def setUp(self): - Base = declarative_base() - - class Character(Base): - __tablename__ = "characters" - name = sa.Column(sa.String, primary_key=True) - timestamp = sa.Column(sa.DateTime) - - self.Character = Character - - def test_date_trunc_type_is_timestamp(self): - f = sa.func.date_trunc("minute", self.Character.timestamp) - self.assertEqual(len(f.base_columns), 1) - for col in f.base_columns: - self.assertIsInstance(col.type, TIMESTAMP) diff --git a/src/crate/client/sqlalchemy/tests/insert_from_select_test.py b/src/crate/client/sqlalchemy/tests/insert_from_select_test.py deleted file mode 100644 index 692dfa55..00000000 --- a/src/crate/client/sqlalchemy/tests/insert_from_select_test.py +++ /dev/null @@ -1,85 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from datetime import datetime -from unittest import TestCase -from unittest.mock import patch, MagicMock - -import sqlalchemy as sa -from sqlalchemy import select, insert -from sqlalchemy.orm import Session -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.cursor import Cursor - - -fake_cursor = MagicMock(name='fake_cursor') -fake_cursor.rowcount = 1 -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) -FakeCursor.return_value = fake_cursor - - -class SqlAlchemyInsertFromSelectTest(TestCase): - - def assertSQL(self, expected_str, actual_expr): - self.assertEqual(expected_str, str(actual_expr).replace('\n', '')) - - def setUp(self): - self.engine = sa.create_engine('crate://') - Base = declarative_base() - - class Character(Base): - __tablename__ = 'characters' - - name = sa.Column(sa.String, primary_key=True) - age = sa.Column(sa.Integer) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) - status = sa.Column(sa.String) - - class CharacterArchive(Base): - __tablename__ = 'characters_archive' - - name = sa.Column(sa.String, primary_key=True) - age = sa.Column(sa.Integer) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) - status = sa.Column(sa.String) - - self.character = Character - self.character_archived = CharacterArchive - self.session = Session(bind=self.engine) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_insert_from_select_triggered(self): - char = self.character(name='Arthur', status='Archived') - self.session.add(char) - self.session.commit() - - sel = select(self.character.name, self.character.age).where(self.character.status == "Archived") - ins = insert(self.character_archived).from_select(['name', 'age'], sel) - self.session.execute(ins) - self.session.commit() - self.assertSQL( - "INSERT INTO characters_archive (name, age) SELECT characters.name, characters.age FROM characters WHERE characters.status = ?", - ins.compile(bind=self.engine) - ) diff --git a/src/crate/client/sqlalchemy/tests/match_test.py b/src/crate/client/sqlalchemy/tests/match_test.py deleted file mode 100644 index 735709c3..00000000 --- a/src/crate/client/sqlalchemy/tests/match_test.py +++ /dev/null @@ -1,137 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - - -from unittest import TestCase -from unittest.mock import MagicMock - -import sqlalchemy as sa -from sqlalchemy.orm import Session -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.sqlalchemy.types import ObjectType -from crate.client.sqlalchemy.predicates import match -from crate.client.cursor import Cursor - - -fake_cursor = MagicMock(name='fake_cursor') -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) -FakeCursor.return_value = fake_cursor - - -class SqlAlchemyMatchTest(TestCase): - - def setUp(self): - self.engine = sa.create_engine('crate://') - metadata = sa.MetaData() - self.quotes = sa.Table('quotes', metadata, - sa.Column('author', sa.String), - sa.Column('quote', sa.String)) - self.session, self.Character = self.set_up_character_and_session() - self.maxDiff = None - - def assertSQL(self, expected_str, actual_expr): - self.assertEqual(expected_str, str(actual_expr).replace('\n', '')) - - def set_up_character_and_session(self): - Base = declarative_base() - - class Character(Base): - __tablename__ = 'characters' - name = sa.Column(sa.String, primary_key=True) - info = sa.Column(ObjectType) - - session = Session(bind=self.engine) - return session, Character - - def test_simple_match(self): - query = self.session.query(self.Character.name) \ - .filter(match(self.Character.name, 'Trillian')) - self.assertSQL( - "SELECT characters.name AS characters_name FROM characters " + - "WHERE match(characters.name, ?)", - query - ) - - def test_match_boost(self): - query = self.session.query(self.Character.name) \ - .filter(match({self.Character.name: 0.5}, 'Trillian')) - self.assertSQL( - "SELECT characters.name AS characters_name FROM characters " + - "WHERE match((characters.name 0.5), ?)", - query - ) - - def test_muli_match(self): - query = self.session.query(self.Character.name) \ - .filter(match({self.Character.name: 0.5, - self.Character.info['race']: 0.9}, - 'Trillian')) - self.assertSQL( - "SELECT characters.name AS characters_name FROM characters " + - "WHERE match(" + - "(characters.info['race'] 0.9, characters.name 0.5), ?" + - ")", - query - ) - - def test_match_type_options(self): - query = self.session.query(self.Character.name) \ - .filter(match({self.Character.name: 0.5, - self.Character.info['race']: 0.9}, - 'Trillian', - match_type='phrase', - options={'fuzziness': 3, 'analyzer': 'english'})) - self.assertSQL( - "SELECT characters.name AS characters_name FROM characters " + - "WHERE match(" + - "(characters.info['race'] 0.9, characters.name 0.5), ?" + - ") using phrase with (analyzer=english, fuzziness=3)", - query - ) - - def test_score(self): - query = self.session.query(self.Character.name, - sa.literal_column('_score')) \ - .filter(match(self.Character.name, 'Trillian')) - self.assertSQL( - "SELECT characters.name AS characters_name, _score " + - "FROM characters WHERE match(characters.name, ?)", - query - ) - - def test_options_without_type(self): - query = self.session.query(self.Character.name).filter( - match({self.Character.name: 0.5, self.Character.info['race']: 0.9}, - 'Trillian', - options={'boost': 10.0}) - ) - err = None - try: - str(query) - except ValueError as e: - err = e - msg = "missing match_type. " + \ - "It's not allowed to specify options without match_type" - self.assertEqual(str(err), msg) diff --git a/src/crate/client/sqlalchemy/tests/query_caching.py b/src/crate/client/sqlalchemy/tests/query_caching.py deleted file mode 100644 index 43e28a44..00000000 --- a/src/crate/client/sqlalchemy/tests/query_caching.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from __future__ import absolute_import -from unittest import TestCase, skipIf - -import sqlalchemy as sa -from sqlalchemy.orm import Session -from sqlalchemy.sql.operators import eq - -from crate.client.sqlalchemy import SA_VERSION, SA_1_4 -from crate.testing.settings import crate_host - -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.sqlalchemy.types import ObjectType, ObjectArray - - -class SqlAlchemyQueryCompilationCaching(TestCase): - - def setUp(self): - self.engine = sa.create_engine(f"crate://{crate_host}") - self.metadata = sa.MetaData(schema="testdrive") - self.session = Session(bind=self.engine) - self.Character = self.setup_entity() - - def setup_entity(self): - """ - Define ORM entity. - """ - Base = declarative_base(metadata=self.metadata) - - class Character(Base): - __tablename__ = 'characters' - name = sa.Column(sa.String, primary_key=True) - age = sa.Column(sa.Integer) - data = sa.Column(ObjectType) - data_list = sa.Column(ObjectArray) - - return Character - - def setup_data(self): - """ - Insert two records into the `characters` table. - """ - self.metadata.drop_all(self.engine) - self.metadata.create_all(self.engine) - - Character = self.Character - char1 = Character(name='Trillian', data={'x': 1}, data_list=[{'foo': 1, 'bar': 10}]) - char2 = Character(name='Slartibartfast', data={'y': 2}, data_list=[{'bar': 2}]) - self.session.add(char1) - self.session.add(char2) - self.session.commit() - self.session.execute(sa.text("REFRESH TABLE testdrive.characters;")) - - @skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'") - def test_object_multiple_select_legacy(self): - """ - The SQLAlchemy implementation of CrateDB's `OBJECT` type offers indexed - access to the instance's content in form of a dictionary. Thus, it must - not use `cache_ok = True` on its implementation, i.e. this part of the - compiled SQL clause must not be cached. - - This test verifies that two subsequent `SELECT` statements are translated - well, and don't trip on incorrect SQL compiled statement caching. - - This variant uses direct value matching on the `OBJECT`s attribute. - """ - self.setup_data() - Character = self.Character - - selectable = sa.select(Character).where(Character.data['x'] == 1) - result = self.session.execute(selectable).scalar_one().data - self.assertEqual({"x": 1}, result) - - selectable = sa.select(Character).where(Character.data['y'] == 2) - result = self.session.execute(selectable).scalar_one().data - self.assertEqual({"y": 2}, result) - - @skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'") - def test_object_multiple_select_modern(self): - """ - The SQLAlchemy implementation of CrateDB's `OBJECT` type offers indexed - access to the instance's content in form of a dictionary. Thus, it must - not use `cache_ok = True` on its implementation, i.e. this part of the - compiled SQL clause must not be cached. - - This test verifies that two subsequent `SELECT` statements are translated - well, and don't trip on incorrect SQL compiled statement caching. - - This variant uses comparator method matching on the `OBJECT`s attribute. - """ - self.setup_data() - Character = self.Character - - selectable = sa.select(Character).where(Character.data['x'].as_integer() == 1) - result = self.session.execute(selectable).scalar_one().data - self.assertEqual({"x": 1}, result) - - selectable = sa.select(Character).where(Character.data['y'].as_integer() == 2) - result = self.session.execute(selectable).scalar_one().data - self.assertEqual({"y": 2}, result) - - @skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'") - def test_objectarray_multiple_select(self): - """ - The SQLAlchemy implementation of CrateDB's `ARRAY` type in form of the - `ObjectArray`, does *not* offer indexed access to the instance's content. - Thus, using `cache_ok = True` on that type should be sane, and not mess - up SQLAlchemy's SQL compiled statement caching. - """ - self.setup_data() - Character = self.Character - - selectable = sa.select(Character).where(Character.data_list['foo'].any(1, operator=eq)) - result = self.session.execute(selectable).scalar_one().data - self.assertEqual({"x": 1}, result) - - selectable = sa.select(Character).where(Character.data_list['bar'].any(2, operator=eq)) - result = self.session.execute(selectable).scalar_one().data - self.assertEqual({"y": 2}, result) diff --git a/src/crate/client/sqlalchemy/tests/update_test.py b/src/crate/client/sqlalchemy/tests/update_test.py deleted file mode 100644 index a2d5462b..00000000 --- a/src/crate/client/sqlalchemy/tests/update_test.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. - -from datetime import datetime -from unittest import TestCase -from unittest.mock import patch, MagicMock - -from crate.client.sqlalchemy.types import ObjectType - -import sqlalchemy as sa -from sqlalchemy.orm import Session -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base - -from crate.client.cursor import Cursor - - -fake_cursor = MagicMock(name='fake_cursor') -fake_cursor.rowcount = 1 -FakeCursor = MagicMock(name='FakeCursor', spec=Cursor) -FakeCursor.return_value = fake_cursor - - -class SqlAlchemyUpdateTest(TestCase): - - def setUp(self): - self.engine = sa.create_engine('crate://') - self.base = declarative_base() - - class Character(self.base): - __tablename__ = 'characters' - - name = sa.Column(sa.String, primary_key=True) - age = sa.Column(sa.Integer) - obj = sa.Column(ObjectType) - ts = sa.Column(sa.DateTime, onupdate=datetime.utcnow) - - self.character = Character - self.session = Session(bind=self.engine) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_onupdate_is_triggered(self): - char = self.character(name='Arthur') - self.session.add(char) - self.session.commit() - now = datetime.utcnow() - - fake_cursor.fetchall.return_value = [('Arthur', None)] - fake_cursor.description = ( - ('characters_name', None, None, None, None, None, None), - ('characters_ts', None, None, None, None, None, None), - ) - - char.age = 40 - self.session.commit() - - expected_stmt = ("UPDATE characters SET age = ?, " - "ts = ? WHERE characters.name = ?") - args, kwargs = fake_cursor.execute.call_args - stmt = args[0] - args = args[1] - self.assertEqual(expected_stmt, stmt) - self.assertEqual(40, args[0]) - dt = datetime.strptime(args[1], '%Y-%m-%dT%H:%M:%S.%fZ') - self.assertIsInstance(dt, datetime) - self.assertGreater(dt, now) - self.assertEqual('Arthur', args[2]) - - @patch('crate.client.connection.Cursor', FakeCursor) - def test_bulk_update(self): - """ - Checks whether bulk updates work correctly - on native types and Crate types. - """ - before_update_time = datetime.utcnow() - - self.session.query(self.character).update({ - # change everyone's name to Julia - self.character.name: 'Julia', - self.character.obj: {'favorite_book': 'Romeo & Juliet'} - }) - - self.session.commit() - - expected_stmt = ("UPDATE characters SET " - "name = ?, obj = ?, ts = ?") - args, kwargs = fake_cursor.execute.call_args - stmt = args[0] - args = args[1] - self.assertEqual(expected_stmt, stmt) - self.assertEqual('Julia', args[0]) - self.assertEqual({'favorite_book': 'Romeo & Juliet'}, args[1]) - dt = datetime.strptime(args[2], '%Y-%m-%dT%H:%M:%S.%fZ') - self.assertIsInstance(dt, datetime) - self.assertGreater(dt, before_update_time) diff --git a/src/crate/client/sqlalchemy/tests/warnings_test.py b/src/crate/client/sqlalchemy/tests/warnings_test.py deleted file mode 100644 index 80023005..00000000 --- a/src/crate/client/sqlalchemy/tests/warnings_test.py +++ /dev/null @@ -1,64 +0,0 @@ -# -*- coding: utf-8; -*- -import sys -import warnings -from unittest import TestCase, skipIf - -from crate.client.sqlalchemy import SA_1_4, SA_VERSION -from crate.testing.util import ExtraAssertions - - -class SqlAlchemyWarningsTest(TestCase, ExtraAssertions): - """ - Verify a few `DeprecationWarning` spots. - - https://docs.python.org/3/library/warnings.html#testing-warnings - """ - - @skipIf(SA_VERSION >= SA_1_4, "There is no deprecation warning for " - "SQLAlchemy 1.3 on higher versions") - def test_sa13_deprecation_warning(self): - """ - Verify that a `DeprecationWarning` is issued when running SQLAlchemy 1.3. - """ - with warnings.catch_warnings(record=True) as w: - - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - - # Trigger a warning by importing the SQLAlchemy dialect module. - # Because it already has been loaded, unload it beforehand. - del sys.modules["crate.client.sqlalchemy"] - import crate.client.sqlalchemy # noqa: F401 - - # Verify details of the SA13 EOL/deprecation warning. - self.assertEqual(len(w), 1) - self.assertIsSubclass(w[-1].category, DeprecationWarning) - self.assertIn("SQLAlchemy 1.3 is effectively EOL.", str(w[-1].message)) - - def test_craty_object_deprecation_warning(self): - """ - Verify that a `DeprecationWarning` is issued when accessing the deprecated - module variables `Craty`, and `Object`. The new type is called `ObjectType`. - """ - - with warnings.catch_warnings(record=True) as w: - - # Import the deprecated symbol. - from crate.client.sqlalchemy.types import Craty # noqa: F401 - - # Verify details of the deprecation warning. - self.assertEqual(len(w), 1) - self.assertIsSubclass(w[-1].category, DeprecationWarning) - self.assertIn("Craty is deprecated and will be removed in future releases. " - "Please use ObjectType instead.", str(w[-1].message)) - - with warnings.catch_warnings(record=True) as w: - - # Import the deprecated symbol. - from crate.client.sqlalchemy.types import Object # noqa: F401 - - # Verify details of the deprecation warning. - self.assertEqual(len(w), 1) - self.assertIsSubclass(w[-1].category, DeprecationWarning) - self.assertIn("Object is deprecated and will be removed in future releases. " - "Please use ObjectType instead.", str(w[-1].message)) diff --git a/src/crate/client/sqlalchemy/types.py b/src/crate/client/sqlalchemy/types.py deleted file mode 100644 index f9899d92..00000000 --- a/src/crate/client/sqlalchemy/types.py +++ /dev/null @@ -1,277 +0,0 @@ -# -*- coding: utf-8; -*- -# -# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor -# license agreements. See the NOTICE file distributed with this work for -# additional information regarding copyright ownership. Crate licenses -# this file to you under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. You may -# obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# However, if you have executed another commercial license agreement -# with Crate these terms will supersede the license and you may use the -# software solely pursuant to the terms of the relevant commercial agreement. -import warnings - -import sqlalchemy.types as sqltypes -from sqlalchemy.sql import operators, expression -from sqlalchemy.sql import default_comparator -from sqlalchemy.ext.mutable import Mutable - -import geojson - - -class MutableList(Mutable, list): - - @classmethod - def coerce(cls, key, value): - """ Convert plain list to MutableList """ - if not isinstance(value, MutableList): - if isinstance(value, list): - return MutableList(value) - elif value is None: - return value - else: - return MutableList([value]) - else: - return value - - def __init__(self, initval=None): - list.__init__(self, initval or []) - - def __setitem__(self, key, value): - list.__setitem__(self, key, value) - self.changed() - - def __eq__(self, other): - return list.__eq__(self, other) - - def append(self, item): - list.append(self, item) - self.changed() - - def insert(self, idx, item): - list.insert(self, idx, item) - self.changed() - - def extend(self, iterable): - list.extend(self, iterable) - self.changed() - - def pop(self, index=-1): - list.pop(self, index) - self.changed() - - def remove(self, item): - list.remove(self, item) - self.changed() - - -class MutableDict(Mutable, dict): - - @classmethod - def coerce(cls, key, value): - "Convert plain dictionaries to MutableDict." - - if not isinstance(value, MutableDict): - if isinstance(value, dict): - return MutableDict(value) - - # this call will raise ValueError - return Mutable.coerce(key, value) - else: - return value - - def __init__(self, initval=None, to_update=None, root_change_key=None): - initval = initval or {} - self._changed_keys = set() - self._deleted_keys = set() - self._overwrite_key = root_change_key - self.to_update = self if to_update is None else to_update - for k in initval: - initval[k] = self._convert_dict(initval[k], - overwrite_key=k if self._overwrite_key is None else self._overwrite_key - ) - dict.__init__(self, initval) - - def __setitem__(self, key, value): - value = self._convert_dict(value, key if self._overwrite_key is None else self._overwrite_key) - dict.__setitem__(self, key, value) - self.to_update.on_key_changed( - key if self._overwrite_key is None else self._overwrite_key - ) - - def __delitem__(self, key): - dict.__delitem__(self, key) - # add the key to the deleted keys if this is the root object - # otherwise update on root object - if self._overwrite_key is None: - self._deleted_keys.add(key) - self.changed() - else: - self.to_update.on_key_changed(self._overwrite_key) - - def on_key_changed(self, key): - self._deleted_keys.discard(key) - self._changed_keys.add(key) - self.changed() - - def _convert_dict(self, value, overwrite_key): - if isinstance(value, dict) and not isinstance(value, MutableDict): - return MutableDict(value, self.to_update, overwrite_key) - return value - - def __eq__(self, other): - return dict.__eq__(self, other) - - -class ObjectTypeImpl(sqltypes.UserDefinedType, sqltypes.JSON): - - __visit_name__ = "OBJECT" - - cache_ok = False - none_as_null = False - - -# Designated name to refer to. `Object` is too ambiguous. -ObjectType = MutableDict.as_mutable(ObjectTypeImpl) - -# Backward-compatibility aliases. -_deprecated_Craty = ObjectType -_deprecated_Object = ObjectType - -# https://www.lesinskis.com/deprecating-module-scope-variables.html -deprecated_names = ["Craty", "Object"] - - -def __getattr__(name): - if name in deprecated_names: - warnings.warn(f"{name} is deprecated and will be removed in future releases. " - f"Please use ObjectType instead.", DeprecationWarning) - return globals()[f"_deprecated_{name}"] - raise AttributeError(f"module {__name__} has no attribute {name}") - - -class Any(expression.ColumnElement): - """Represent the clause ``left operator ANY (right)``. ``right`` must be - an array expression. - - copied from postgresql dialect - - .. seealso:: - - :class:`sqlalchemy.dialects.postgresql.ARRAY` - - :meth:`sqlalchemy.dialects.postgresql.ARRAY.Comparator.any` - ARRAY-bound method - - """ - __visit_name__ = 'any' - inherit_cache = True - - def __init__(self, left, right, operator=operators.eq): - self.type = sqltypes.Boolean() - self.left = expression.literal(left) - self.right = right - self.operator = operator - - -class _ObjectArray(sqltypes.UserDefinedType): - cache_ok = True - - class Comparator(sqltypes.TypeEngine.Comparator): - def __getitem__(self, key): - return default_comparator._binary_operate(self.expr, - operators.getitem, - key) - - def any(self, other, operator=operators.eq): - """Return ``other operator ANY (array)`` clause. - - Argument places are switched, because ANY requires array - expression to be on the right hand-side. - - E.g.:: - - from sqlalchemy.sql import operators - - conn.execute( - select([table.c.data]).where( - table.c.data.any(7, operator=operators.lt) - ) - ) - - :param other: expression to be compared - :param operator: an operator object from the - :mod:`sqlalchemy.sql.operators` - package, defaults to :func:`.operators.eq`. - - .. seealso:: - - :class:`.postgresql.Any` - - :meth:`.postgresql.ARRAY.Comparator.all` - - """ - return Any(other, self.expr, operator=operator) - - type = MutableList - comparator_factory = Comparator - - def get_col_spec(self, **kws): - return "ARRAY(OBJECT)" - - -ObjectArray = MutableList.as_mutable(_ObjectArray) - - -class Geopoint(sqltypes.UserDefinedType): - cache_ok = True - - class Comparator(sqltypes.TypeEngine.Comparator): - - def __getitem__(self, key): - return default_comparator._binary_operate(self.expr, - operators.getitem, - key) - - def get_col_spec(self): - return 'GEO_POINT' - - def bind_processor(self, dialect): - def process(value): - if isinstance(value, geojson.Point): - return value.coordinates - return value - return process - - def result_processor(self, dialect, coltype): - return tuple - - comparator_factory = Comparator - - -class Geoshape(sqltypes.UserDefinedType): - cache_ok = True - - class Comparator(sqltypes.TypeEngine.Comparator): - - def __getitem__(self, key): - return default_comparator._binary_operate(self.expr, - operators.getitem, - key) - - def get_col_spec(self): - return 'GEO_SHAPE' - - def result_processor(self, dialect, coltype): - return geojson.GeoJSON.to_instance - - comparator_factory = Comparator diff --git a/src/crate/client/tests.py b/src/crate/client/tests.py index 0f5878d7..2f6be428 100644 --- a/src/crate/client/tests.py +++ b/src/crate/client/tests.py @@ -24,7 +24,6 @@ import json import os import socket -import sys import unittest import doctest from pprint import pprint @@ -41,7 +40,6 @@ crate_host, crate_path, crate_port, \ crate_transport_port, docs_path, localhost from crate.client import connect -from .sqlalchemy import SA_VERSION, SA_2_0 from .test_cursor import CursorTest from .test_connection import ConnectionTest @@ -56,8 +54,6 @@ TestCrateJsonEncoder, TestDefaultSchemaHeader, ) -from .sqlalchemy.tests import test_suite_unit as sqlalchemy_test_suite_unit -from .sqlalchemy.tests import test_suite_integration as sqlalchemy_test_suite_integration makeSuite = unittest.TestLoader().loadTestsFromTestCase @@ -145,37 +141,6 @@ def setUpCrateLayerBaseline(test): cursor.close() -def setUpCrateLayerSqlAlchemy(test): - """ - Setup tables and views needed for SQLAlchemy tests. - """ - setUpCrateLayerBaseline(test) - - ddl_statements = [ - """ - CREATE TABLE characters ( - id STRING PRIMARY KEY, - name STRING, - quote STRING, - details OBJECT, - more_details ARRAY(OBJECT), - INDEX name_ft USING fulltext(name) WITH (analyzer = 'english'), - INDEX quote_ft USING fulltext(quote) WITH (analyzer = 'english') - )""", - """ - CREATE VIEW characters_view - AS SELECT * FROM characters - """, - """ - CREATE TABLE cities ( - name STRING PRIMARY KEY, - coordinate GEO_POINT, - area GEO_SHAPE - )""" - ] - _execute_statements(ddl_statements, on_error="raise") - - def tearDownDropEntitiesBaseline(test): """ Drop all tables, views, and users created by `setUpWithCrateLayer*`. @@ -189,19 +154,6 @@ def tearDownDropEntitiesBaseline(test): _execute_statements(ddl_statements) -def tearDownDropEntitiesSqlAlchemy(test): - """ - Drop all tables, views, and users created by `setUpWithCrateLayer*`. - """ - tearDownDropEntitiesBaseline(test) - ddl_statements = [ - "DROP TABLE characters", - "DROP VIEW characters_view", - "DROP TABLE cities", - ] - _execute_statements(ddl_statements) - - class HttpsTestServerLayer: PORT = 65534 HOST = "localhost" @@ -349,7 +301,6 @@ def test_suite(): suite.addTest(makeSuite(TestUsernameSentAsHeader)) suite.addTest(makeSuite(TestCrateJsonEncoder)) suite.addTest(makeSuite(TestDefaultSchemaHeader)) - suite.addTest(sqlalchemy_test_suite_unit()) suite.addTest(doctest.DocTestSuite('crate.client.connection')) suite.addTest(doctest.DocTestSuite('crate.client.http')) @@ -386,31 +337,4 @@ def test_suite(): s.layer = ensure_cratedb_layer() suite.addTest(s) - sqlalchemy_integration_tests = [ - 'docs/by-example/sqlalchemy/getting-started.rst', - 'docs/by-example/sqlalchemy/crud.rst', - 'docs/by-example/sqlalchemy/working-with-types.rst', - 'docs/by-example/sqlalchemy/advanced-querying.rst', - 'docs/by-example/sqlalchemy/inspection-reflection.rst', - ] - - # Don't run DataFrame integration tests on SQLAlchemy 1.3 and Python 3.7. - skip_dataframe = SA_VERSION < SA_2_0 or sys.version_info < (3, 8) - if not skip_dataframe: - sqlalchemy_integration_tests += [ - 'docs/by-example/sqlalchemy/dataframe.rst', - ] - - s = doctest.DocFileSuite( - *sqlalchemy_integration_tests, - module_relative=False, - setUp=setUpCrateLayerSqlAlchemy, - tearDown=tearDownDropEntitiesSqlAlchemy, - optionflags=flags, - encoding='utf-8' - ) - s.layer = ensure_cratedb_layer() - s.addTest(sqlalchemy_test_suite_integration()) - suite.addTest(s) - return suite diff --git a/tox.ini b/tox.ini index fa7995bc..978bd90c 100644 --- a/tox.ini +++ b/tox.ini @@ -8,11 +8,6 @@ deps = zope.testrunner zope.testing zc.customdoctests - sa_1_0: sqlalchemy>=1.0,<1.1 - sa_1_1: sqlalchemy>=1.1,<1.2 - sa_1_2: sqlalchemy>=1.2,<1.3 - sa_1_3: sqlalchemy>=1.3,<1.4 - sa_1_4: sqlalchemy>=1.4,<1.5 mock urllib3 commands =