diff --git a/snooty.toml b/snooty.toml index a2ec0b4a..677d69a9 100644 --- a/snooty.toml +++ b/snooty.toml @@ -6,8 +6,8 @@ intersphinx = ["https://www.mongodb.com/docs/manual/objects.inv"] toc_landing_pages = ["configuration"] [constants] -driver-short = "Spark Connector" -driver-long = "MongoDB {+driver-short+}" +connector-short = "Spark Connector" +connector-long = "MongoDB {+connector-short+}" current-version = "10.2.0" artifact-id-2-13 = "mongo-spark-connector_2.13" artifact-id-2-12 = "mongo-spark-connector_2.12" diff --git a/source/configuration/read.txt b/source/configuration/read.txt index 640268f4..68ec98bd 100644 --- a/source/configuration/read.txt +++ b/source/configuration/read.txt @@ -133,7 +133,7 @@ You can configure the following properties to read from MongoDB: Partitioner Configurations ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Partitioners change the read behavior for batch reads with the {+driver-short+}. +Partitioners change the read behavior for batch reads with the {+connector-short+}. They do not affect Structured Streaming because the data stream processing engine produces a single stream with Structured Streaming. @@ -330,9 +330,13 @@ Change Streams - | Specifies whether to publish the changed document or the full change stream document. | - | When set to ``true``, the connector filters out messages that + | When this setting is ``true``, the connector exhibits the following behavior: + + - The connector filters out messages that omit the ``fullDocument`` field and only publishes the value of the field. + - If you don't specify a schema, the connector infers the schema + from the change stream document rather than from the underlying collection. .. note:: diff --git a/source/read-from-mongodb.txt b/source/read-from-mongodb.txt index 2eaf674c..4fdca2db 100644 --- a/source/read-from-mongodb.txt +++ b/source/read-from-mongodb.txt @@ -42,6 +42,18 @@ Overview .. include:: /scala/filters.txt +.. important:: Inferring the Schema of a Change Stream + + When the {+connector-short+} infers the schema of a data frame + read from a change stream, by default, + it will use the schema of the underlying collection rather than that + of the change stream. If you set the ``change.stream.publish.full.document.only`` + option to ``true``, the connector uses the schema of the + change stream instead. + + For more information on configuring a read operation, see the + :ref:`spark-change-stream-conf` section of the Read Configuration Options guide. + SQL Queries ----------- diff --git a/source/structured-streaming.txt b/source/structured-streaming.txt index f6ce1ebe..4b93b4df 100644 --- a/source/structured-streaming.txt +++ b/source/structured-streaming.txt @@ -191,7 +191,7 @@ Configuring a Write Stream to MongoDB Configuring a Read Stream from MongoDB -------------------------------------- -When reading a stream from a MongoDB database, the {+driver-long+} supports both +When reading a stream from a MongoDB database, the {+connector-long+} supports both *micro-batch processing* and *continuous processing*. Micro-batch processing is the default processing engine, while continuous processing is an experimental feature introduced in