From d52b311c690b6e226c935f7574f5823edf1ac491 Mon Sep 17 00:00:00 2001 From: Mike Woofter Date: Mon, 10 Jul 2023 13:12:27 -0500 Subject: [PATCH 1/5] wip --- source/configuration/read.txt | 10 +++++++--- source/read-from-mongodb.txt | 12 ++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/source/configuration/read.txt b/source/configuration/read.txt index 640268f4..05afd1bb 100644 --- a/source/configuration/read.txt +++ b/source/configuration/read.txt @@ -330,9 +330,13 @@ Change Streams - | Specifies whether to publish the changed document or the full change stream document. | - | When set to ``true``, the connector filters out messages that - omit the ``fullDocument`` field and only publishes the value of the - field. + | When set to ``true``: + | + | - The connector filters out messages that + omit the ``fullDocument`` field and only publishes the value of the + field. + | - If you don't specify a schema, the connector infers the schema of the + change stream document rather than the underlying collection. .. note:: diff --git a/source/read-from-mongodb.txt b/source/read-from-mongodb.txt index 2eaf674c..d8d291d7 100644 --- a/source/read-from-mongodb.txt +++ b/source/read-from-mongodb.txt @@ -42,6 +42,18 @@ Overview .. include:: /scala/filters.txt +.. important:: Change Stream Schema Inference + + When the {+driver-short+} infers the schema of a data frame + read from a change stream, by default, + it will use the schema of the underlying collection rather than the schema + of the change stream. To instruct the connector to use the schema of the + change stream, set the ``change.stream.publish.full.document.only`` option + to ``true``. + + For more information on configuring a read operation, see the + :ref:`spark-read-conf` guide. + SQL Queries ----------- From 9a99ba843609e88436b8e4e643e2eb144f7ec9f4 Mon Sep 17 00:00:00 2001 From: Mike Woofter Date: Mon, 10 Jul 2023 13:19:00 -0500 Subject: [PATCH 2/5] autobuilder --- source/configuration/read.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/configuration/read.txt b/source/configuration/read.txt index 05afd1bb..6fddbc31 100644 --- a/source/configuration/read.txt +++ b/source/configuration/read.txt @@ -331,12 +331,12 @@ Change Streams change stream document. | | When set to ``true``: - | - | - The connector filters out messages that - omit the ``fullDocument`` field and only publishes the value of the - field. - | - If you don't specify a schema, the connector infers the schema of the - change stream document rather than the underlying collection. + + - The connector filters out messages that + omit the ``fullDocument`` field and only publishes the value of the + field. + - If you don't specify a schema, the connector infers the schema + from the change stream document rather than from the underlying collection. .. note:: From 5ca471405b191b4bd84f52908f407620653b06b7 Mon Sep 17 00:00:00 2001 From: Mike Woofter Date: Mon, 10 Jul 2023 16:49:39 -0500 Subject: [PATCH 3/5] rr feedback --- snooty.toml | 4 ++-- source/configuration/read.txt | 4 ++-- source/read-from-mongodb.txt | 12 ++++++------ source/structured-streaming.txt | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/snooty.toml b/snooty.toml index 92e241e3..4aa69657 100644 --- a/snooty.toml +++ b/snooty.toml @@ -6,8 +6,8 @@ intersphinx = ["https://www.mongodb.com/docs/manual/objects.inv"] toc_landing_pages = ["configuration"] [constants] -driver-short = "Spark Connector" -driver-long = "MongoDB {+driver-short+}" +connector-short = "Spark Connector" +connector-long = "MongoDB {+connector-short+}" current-version = "10.1.1" artifact-id-2-13 = "mongo-spark-connector_2.13" artifact-id-2-12 = "mongo-spark-connector_2.12" diff --git a/source/configuration/read.txt b/source/configuration/read.txt index 6fddbc31..68ec98bd 100644 --- a/source/configuration/read.txt +++ b/source/configuration/read.txt @@ -133,7 +133,7 @@ You can configure the following properties to read from MongoDB: Partitioner Configurations ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Partitioners change the read behavior for batch reads with the {+driver-short+}. +Partitioners change the read behavior for batch reads with the {+connector-short+}. They do not affect Structured Streaming because the data stream processing engine produces a single stream with Structured Streaming. @@ -330,7 +330,7 @@ Change Streams - | Specifies whether to publish the changed document or the full change stream document. | - | When set to ``true``: + | When this setting is ``true``, the connector exhibits the following behavior: - The connector filters out messages that omit the ``fullDocument`` field and only publishes the value of the diff --git a/source/read-from-mongodb.txt b/source/read-from-mongodb.txt index d8d291d7..40b720da 100644 --- a/source/read-from-mongodb.txt +++ b/source/read-from-mongodb.txt @@ -44,15 +44,15 @@ Overview .. important:: Change Stream Schema Inference - When the {+driver-short+} infers the schema of a data frame + When the {+connector-short+} infers the schema of a data frame read from a change stream, by default, - it will use the schema of the underlying collection rather than the schema - of the change stream. To instruct the connector to use the schema of the - change stream, set the ``change.stream.publish.full.document.only`` option - to ``true``. + it will use the schema of the underlying collection rather than that + of the change stream. If you set the ``change.stream.publish.full.document.only`` + option to ``true``, the connector uses the schema of the + change stream instead. For more information on configuring a read operation, see the - :ref:`spark-read-conf` guide. + :ref:`spark-change-stream-conf` guide. SQL Queries ----------- diff --git a/source/structured-streaming.txt b/source/structured-streaming.txt index f6ce1ebe..4b93b4df 100644 --- a/source/structured-streaming.txt +++ b/source/structured-streaming.txt @@ -191,7 +191,7 @@ Configuring a Write Stream to MongoDB Configuring a Read Stream from MongoDB -------------------------------------- -When reading a stream from a MongoDB database, the {+driver-long+} supports both +When reading a stream from a MongoDB database, the {+connector-long+} supports both *micro-batch processing* and *continuous processing*. Micro-batch processing is the default processing engine, while continuous processing is an experimental feature introduced in From d0edd2fe90a0a422860b7174effc7d374bf0f8b4 Mon Sep 17 00:00:00 2001 From: Mike Woofter Date: Mon, 10 Jul 2023 16:55:09 -0500 Subject: [PATCH 4/5] touch ups --- source/read-from-mongodb.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/read-from-mongodb.txt b/source/read-from-mongodb.txt index 40b720da..4fdca2db 100644 --- a/source/read-from-mongodb.txt +++ b/source/read-from-mongodb.txt @@ -42,7 +42,7 @@ Overview .. include:: /scala/filters.txt -.. important:: Change Stream Schema Inference +.. important:: Inferring the Schema of a Change Stream When the {+connector-short+} infers the schema of a data frame read from a change stream, by default, @@ -52,7 +52,7 @@ Overview change stream instead. For more information on configuring a read operation, see the - :ref:`spark-change-stream-conf` guide. + :ref:`spark-change-stream-conf` section of the Read Configuration Options guide. SQL Queries ----------- From 64006254cf1b77624962a7bd69e5b6daa45297b2 Mon Sep 17 00:00:00 2001 From: Caitlin Davey Date: Thu, 13 Jul 2023 09:59:22 -0400 Subject: [PATCH 5/5] Spark Connector Minor versions 10.2 (#165) * Spark Connector Minor versions 10.2 * Delete settings.json Deleting VS code settings --- config/redirects | 2 +- snooty.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/redirects b/config/redirects index f0d0735f..fb2fa78b 100644 --- a/config/redirects +++ b/config/redirects @@ -1,6 +1,6 @@ define: prefix docs/spark-connector define: base https://www.mongodb.com/${prefix} -define: versions v1.1 v2.0 v2.1 v2.2 v2.3 v2.4 v3.0 v10.0 v10.1 master +define: versions v1.1 v2.0 v2.1 v2.2 v2.3 v2.4 v3.0 v10.0 v10.1 v10.2 master raw: ${prefix}/ -> ${base}/current/ raw: ${prefix}/configuration -> ${base}/current/configuration/ diff --git a/snooty.toml b/snooty.toml index 4aa69657..677d69a9 100644 --- a/snooty.toml +++ b/snooty.toml @@ -8,7 +8,7 @@ toc_landing_pages = ["configuration"] [constants] connector-short = "Spark Connector" connector-long = "MongoDB {+connector-short+}" -current-version = "10.1.1" +current-version = "10.2.0" artifact-id-2-13 = "mongo-spark-connector_2.13" artifact-id-2-12 = "mongo-spark-connector_2.12" spark-core-version = "3.3.1"