From 82b673904ae240bc5f9d1f8b9e06c7dca814d6b2 Mon Sep 17 00:00:00 2001
From: Jesse Hallett <jesse.hallett@hasura.io>
Date: Thu, 13 Jun 2024 10:25:15 -0700
Subject: [PATCH 1/3] fix bson serialization test for case where double and int
 unify to double (#80)

In #77 I neglected to adjust our round-trip bson-to-json serialization proptest to handle cases where we treat ints as interchangeable with doubles. This change adds a custom equality test for use solely in tests that can compare ints and doubles.
---
 .../cli/src/introspection/type_unification.rs | 10 ++++++++
 .../query/serialization/tests.txt             |  1 +
 .../src/query/serialization/tests.rs          | 25 +++++++++++++++++--
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs
index 31e539e1..bf997c3f 100644
--- a/crates/cli/src/introspection/type_unification.rs
+++ b/crates/cli/src/introspection/type_unification.rs
@@ -174,6 +174,16 @@ pub fn unify_object_types(
     merged_type_map.into_values().collect()
 }
 
+/// True iff we consider a to be a supertype of b.
+///
+/// Note that if you add more supertypes here then it is important to also update the custom
+/// equality check in our tests in mongodb_agent_common::query::serialization::tests. Equality
+/// needs to be transitive over supertypes, so for example if we have,
+///
+/// (Double, Int), (Decimal, Double)
+///
+/// then in addition to comparing ints to doubles, and doubles to decimals, we also need to compare
+/// decimals to ints.
 fn is_supertype(a: &BsonScalarType, b: &BsonScalarType) -> bool {
     matches!((a, b), (Double, Int))
 }
diff --git a/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt b/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt
index 8a816d59..8304681d 100644
--- a/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt
+++ b/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt
@@ -8,3 +8,4 @@ cc 2efdea7f185f2f38ae643782b3523014ab7b8236e36a79cc6b7a7cac74b06f79 # shrinks to
 cc 26e2543468ab6d4ffa34f9f8a2c920801ef38a35337557a8f4e74c92cf57e344 # shrinks to bson = Document({" ": Document({"¡": DateTime(1970-01-01 0:00:00.001 +00:00:00)})})
 cc 7d760e540b56fedac7dd58e5bdb5bb9613b9b0bc6a88acfab3fc9c2de8bf026d # shrinks to bson = Document({"A": Array([Null, Undefined])})
 cc 21360610045c5a616b371fb8d5492eb0c22065d62e54d9c8a8761872e2e192f3 # shrinks to bson = Array([Document({}), Document({" ": Null})])
+cc 8842e7f78af24e19847be5d8ee3d47c547ef6c1bb54801d360a131f41a87f4fa
diff --git a/crates/mongodb-agent-common/src/query/serialization/tests.rs b/crates/mongodb-agent-common/src/query/serialization/tests.rs
index 75395f41..9d65368b 100644
--- a/crates/mongodb-agent-common/src/query/serialization/tests.rs
+++ b/crates/mongodb-agent-common/src/query/serialization/tests.rs
@@ -21,8 +21,10 @@ proptest! {
 
         let json = bson_to_json(&inferred_type, bson.clone()).map_err(|e| error_context("error converting bson to json", e.to_string()))?;
         let actual = json_to_bson(&inferred_type, json.clone()).map_err(|e| error_context("error converting json to bson", e.to_string()))?;
-        prop_assert_eq!(actual, bson,
-            "\ninferred type: {:?}\nobject types: {:?}\njson_representation: {}",
+        prop_assert!(custom_eq(&actual, &bson),
+            "`(left == right)`\nleft: `{:?}`\nright: `{:?}`\ninferred type: {:?}\nobject types: {:?}\njson_representation: {}",
+            actual,
+            bson,
             inferred_type,
             object_types,
             serde_json::to_string_pretty(&json).unwrap()
@@ -40,3 +42,22 @@ proptest! {
         prop_assert_eq!(actual, bson, "json representation: {}", json)
     }
 }
+
+/// We are treating doubles as a superset of ints, so we need an equality check that allows
+/// comparing those types.
+fn custom_eq(a: &Bson, b: &Bson) -> bool {
+    match (a, b) {
+        (Bson::Double(a), Bson::Int32(b)) | (Bson::Int32(b), Bson::Double(a)) => *a == *b as f64,
+        (Bson::Array(xs), Bson::Array(ys)) => {
+            xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| custom_eq(x, y))
+        }
+        (Bson::Document(a), Bson::Document(b)) => {
+            a.len() == b.len()
+                && a.iter().all(|(key_a, value_a)| match b.get(key_a) {
+                    Some(value_b) => custom_eq(value_a, value_b),
+                    None => false,
+                })
+        }
+        _ => a == b,
+    }
+}

From 5ab20aba90d2aa26729336cabfeb2d88d32fcf1e Mon Sep 17 00:00:00 2001
From: David Overton <dmoverton@users.noreply.github.com>
Date: Fri, 14 Jun 2024 04:43:47 +1000
Subject: [PATCH 2/3] Do not make _id fields nullable when sampling documents
 (#78)

* Do not make _id fields nullable when sampling documents

* Add changelog entry

---------

Co-authored-by: Brandon Martin <brandon@codedmart.com>
---
 CHANGELOG.md                              |  1 +
 crates/cli/src/introspection/sampling.rs  | 97 +++++++++++++++++++----
 crates/cli/src/lib.rs                     |  2 +-
 crates/configuration/src/configuration.rs |  2 +-
 4 files changed, 83 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 98841a11..f0ada28d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ This changelog documents the changes between release versions.
 - Implement column-to-column comparisons within the same collection ([#74](https://github.com/hasura/ndc-mongodb/pull/74))
 - Fix error tracking collection with no documents by skipping such collections during CLI introspection ([#76](https://github.com/hasura/ndc-mongodb/pull/76))
 - If a field contains both `int` and `double` values then the field type is inferred as `double` instead of `ExtendedJSON` ([#77](https://github.com/hasura/ndc-mongodb/pull/77))
+- Fix: schema generated with `_id` column nullable when introspecting schema via sampling ([#78](https://github.com/hasura/ndc-mongodb/pull/78))
 
 ## [0.0.6] - 2024-05-01
 - Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67))
diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs
index 47e3dec6..51a5f720 100644
--- a/crates/cli/src/introspection/sampling.rs
+++ b/crates/cli/src/introspection/sampling.rs
@@ -21,7 +21,7 @@ type ObjectType = WithName<schema::ObjectType>;
 /// are not unifiable.
 pub async fn sample_schema_from_db(
     sample_size: u32,
-    all_schema_nullalble: bool,
+    all_schema_nullable: bool,
     config_file_changed: bool,
     state: &ConnectorState,
     existing_schemas: &HashSet<std::string::String>,
@@ -36,7 +36,7 @@ pub async fn sample_schema_from_db(
             let collection_schema = sample_schema_from_collection(
                 &collection_name,
                 sample_size,
-                all_schema_nullalble,
+                all_schema_nullable,
                 state,
             )
             .await?;
@@ -53,7 +53,7 @@ pub async fn sample_schema_from_db(
 async fn sample_schema_from_collection(
     collection_name: &str,
     sample_size: u32,
-    all_schema_nullalble: bool,
+    all_schema_nullable: bool,
     state: &ConnectorState,
 ) -> anyhow::Result<Option<Schema>> {
     let db = state.database();
@@ -63,8 +63,14 @@ async fn sample_schema_from_collection(
         .aggregate(vec![doc! {"$sample": { "size": sample_size }}], options)
         .await?;
     let mut collected_object_types = vec![];
+    let is_collection_type = true;
     while let Some(document) = cursor.try_next().await? {
-        let object_types = make_object_type(collection_name, &document, all_schema_nullalble);
+        let object_types = make_object_type(
+            collection_name,
+            &document,
+            is_collection_type,
+            all_schema_nullable,
+        );
         collected_object_types = if collected_object_types.is_empty() {
             object_types
         } else {
@@ -91,14 +97,21 @@ async fn sample_schema_from_collection(
 fn make_object_type(
     object_type_name: &str,
     document: &Document,
-    all_schema_nullalble: bool,
+    is_collection_type: bool,
+    all_schema_nullable: bool,
 ) -> Vec<ObjectType> {
     let (mut object_type_defs, object_fields) = {
         let type_prefix = format!("{object_type_name}_");
         let (object_type_defs, object_fields): (Vec<Vec<ObjectType>>, Vec<ObjectField>) = document
             .iter()
             .map(|(field_name, field_value)| {
-                make_object_field(&type_prefix, field_name, field_value, all_schema_nullalble)
+                make_object_field(
+                    &type_prefix,
+                    field_name,
+                    field_value,
+                    is_collection_type,
+                    all_schema_nullable,
+                )
             })
             .unzip();
         (object_type_defs.concat(), object_fields)
@@ -120,11 +133,12 @@ fn make_object_field(
     type_prefix: &str,
     field_name: &str,
     field_value: &Bson,
-    all_schema_nullalble: bool,
+    is_collection_type: bool,
+    all_schema_nullable: bool,
 ) -> (Vec<ObjectType>, ObjectField) {
     let object_type_name = format!("{type_prefix}{field_name}");
     let (collected_otds, field_type) =
-        make_field_type(&object_type_name, field_value, all_schema_nullalble);
+        make_field_type(&object_type_name, field_value, all_schema_nullable);
     let object_field_value = WithName::named(
         field_name.to_owned(),
         schema::ObjectField {
@@ -132,7 +146,8 @@ fn make_object_field(
             r#type: field_type,
         },
     );
-    let object_field = if all_schema_nullalble {
+    let object_field = if all_schema_nullable && !(is_collection_type && field_name == "_id") {
+        // The _id field on a collection type should never be nullable.
         make_nullable_field(object_field_value)
     } else {
         object_field_value
@@ -145,16 +160,16 @@ fn make_object_field(
 pub fn type_from_bson(
     object_type_name: &str,
     value: &Bson,
-    all_schema_nullalble: bool,
+    all_schema_nullable: bool,
 ) -> (BTreeMap<std::string::String, schema::ObjectType>, Type) {
-    let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullalble);
+    let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullable);
     (WithName::into_map(object_types), t)
 }
 
 fn make_field_type(
     object_type_name: &str,
     field_value: &Bson,
-    all_schema_nullalble: bool,
+    all_schema_nullable: bool,
 ) -> (Vec<ObjectType>, Type) {
     fn scalar(t: BsonScalarType) -> (Vec<ObjectType>, Type) {
         (vec![], Type::Scalar(t))
@@ -168,7 +183,7 @@ fn make_field_type(
             let mut result_type = Type::Scalar(Undefined);
             for elem in arr {
                 let (elem_collected_otds, elem_type) =
-                    make_field_type(object_type_name, elem, all_schema_nullalble);
+                    make_field_type(object_type_name, elem, all_schema_nullable);
                 collected_otds = if collected_otds.is_empty() {
                     elem_collected_otds
                 } else {
@@ -179,7 +194,13 @@ fn make_field_type(
             (collected_otds, Type::ArrayOf(Box::new(result_type)))
         }
         Bson::Document(document) => {
-            let collected_otds = make_object_type(object_type_name, document, all_schema_nullalble);
+            let is_collection_type = false;
+            let collected_otds = make_object_type(
+                object_type_name,
+                document,
+                is_collection_type,
+                all_schema_nullable,
+            );
             (collected_otds, Type::Object(object_type_name.to_owned()))
         }
         Bson::Boolean(_) => scalar(Bool),
@@ -220,7 +241,7 @@ mod tests {
         let object_name = "foo";
         let doc = doc! {"my_int": 1, "my_string": "two"};
         let result =
-            WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false));
+            WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false, false));
 
         let expected = BTreeMap::from([(
             object_name.to_owned(),
@@ -250,12 +271,54 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn simple_doc_nullable_fields() -> Result<(), anyhow::Error> {
+        let object_name = "foo";
+        let doc = doc! {"my_int": 1, "my_string": "two", "_id": 0};
+        let result =
+            WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, true, true));
+
+        let expected = BTreeMap::from([(
+            object_name.to_owned(),
+            ObjectType {
+                fields: BTreeMap::from([
+                    (
+                        "_id".to_owned(),
+                        ObjectField {
+                            r#type: Type::Scalar(BsonScalarType::Int),
+                            description: None,
+                        },
+                    ),
+                    (
+                        "my_int".to_owned(),
+                        ObjectField {
+                            r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::Int))),
+                            description: None,
+                        },
+                    ),
+                    (
+                        "my_string".to_owned(),
+                        ObjectField {
+                            r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::String))),
+                            description: None,
+                        },
+                    ),
+                ]),
+                description: None,
+            },
+        )]);
+
+        assert_eq!(expected, result);
+
+        Ok(())
+    }
+
     #[test]
     fn array_of_objects() -> Result<(), anyhow::Error> {
         let object_name = "foo";
         let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": "wut", "baz": 3.77}]};
         let result =
-            WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false));
+            WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false, false));
 
         let expected = BTreeMap::from([
             (
@@ -316,7 +379,7 @@ mod tests {
         let object_name = "foo";
         let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": 17, "baz": 3.77}]};
         let result =
-            WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false));
+            WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false, false));
 
         let expected = BTreeMap::from([
             (
diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs
index 34622108..46b510a5 100644
--- a/crates/cli/src/lib.rs
+++ b/crates/cli/src/lib.rs
@@ -56,7 +56,7 @@ async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> {
         None => configuration_options.introspection_options.no_validator_schema
     };
     let all_schema_nullable = match args.all_schema_nullable {
-        Some(validator) => validator,
+        Some(b) => b,
         None => configuration_options.introspection_options.all_schema_nullable
     };
     let config_file_changed = configuration::get_config_file_changed(&context.path).await?;
diff --git a/crates/configuration/src/configuration.rs b/crates/configuration/src/configuration.rs
index 8c645515..e7719ec7 100644
--- a/crates/configuration/src/configuration.rs
+++ b/crates/configuration/src/configuration.rs
@@ -206,7 +206,7 @@ pub struct ConfigurationIntrospectionOptions {
     // Whether to try validator schema first if one exists.
     pub no_validator_schema: bool,
 
-    // Default to setting all schema fields as nullable.
+    // Default to setting all schema fields, except the _id field on collection types, as nullable.
     pub all_schema_nullable: bool,
 }
 

From cd030f327f63c4ec8839b2a211d55f5ee78b8439 Mon Sep 17 00:00:00 2001
From: David Overton <dmoverton@users.noreply.github.com>
Date: Fri, 14 Jun 2024 04:53:29 +1000
Subject: [PATCH 3/3] Relax type requirements for primary uniqueness constraint
 (#79)

* Don't require _id field to have type ObjectId when generate primary uniqueness constraint

* Add changelog entry

* Require comparable scalar type for _id

* remove unused import to get clippy check passing

---------

Co-authored-by: Jesse Hallett <jesse.hallett@hasura.io>
Co-authored-by: Brandon Martin <brandon@codedmart.com>
---
 CHANGELOG.md                              | 11 ++++++++++-
 crates/configuration/src/configuration.rs |  5 ++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f0ada28d..02f26d0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,9 @@
 # MongoDB Connector Changelog
+
 This changelog documents the changes between release versions.
 
 ## [Unreleased]
+
 - Support filtering and sorting by fields of related collections ([#72](https://github.com/hasura/ndc-mongodb/pull/72))
 - Support for root collection column references ([#75](https://github.com/hasura/ndc-mongodb/pull/75))
 - Fix for databases with field names that begin with a dollar sign, or that contain dots ([#74](https://github.com/hasura/ndc-mongodb/pull/74))
@@ -9,8 +11,10 @@ This changelog documents the changes between release versions.
 - Fix error tracking collection with no documents by skipping such collections during CLI introspection ([#76](https://github.com/hasura/ndc-mongodb/pull/76))
 - If a field contains both `int` and `double` values then the field type is inferred as `double` instead of `ExtendedJSON` ([#77](https://github.com/hasura/ndc-mongodb/pull/77))
 - Fix: schema generated with `_id` column nullable when introspecting schema via sampling ([#78](https://github.com/hasura/ndc-mongodb/pull/78))
+- Don't require _id field to have type ObjectId when generating primary uniqueness constraint ([#79](https://github.com/hasura/ndc-mongodb/pull/79))
 
 ## [0.0.6] - 2024-05-01
+
 - Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67))
   - To log all events set `RUST_LOG=mongodb::command=debug,mongodb::connection=debug,mongodb::server_selection=debug,mongodb::topology=debug`
 - Relations with a single column mapping now use concise correlated subquery syntax in `$lookup` stage ([#65](https://github.com/hasura/ndc-mongodb/pull/65))
@@ -21,15 +25,17 @@ This changelog documents the changes between release versions.
   - Note: `native_procedures` folder in configuration is not deprecated. It will continue to work for a few releases, but renaming your folder is all that is needed.
 
 ## [0.0.5] - 2024-04-26
+
 - Fix incorrect order of results for query requests with more than 10 variable sets (#37)
 - In the CLI update command, don't overwrite schema files that haven't changed ([#49](https://github.com/hasura/ndc-mongodb/pull/49/files))
 - In the CLI update command, if the database URI is not provided the error message now mentions the correct environment variable to use (`MONGODB_DATABASE_URI`) ([#50](https://github.com/hasura/ndc-mongodb/pull/50))
 - Update to latest NDC SDK ([#51](https://github.com/hasura/ndc-mongodb/pull/51))
-- Update `rustls` dependency to fix https://github.com/hasura/ndc-mongodb/security/dependabot/1 ([#51](https://github.com/hasura/ndc-mongodb/pull/51))
+- Update `rustls` dependency to fix <https://github.com/hasura/ndc-mongodb/security/dependabot/1> ([#51](https://github.com/hasura/ndc-mongodb/pull/51))
 - Serialize query and mutation response fields with known types using simple JSON instead of Extended JSON (#53) (#59)
 - Add trace spans ([#58](https://github.com/hasura/ndc-mongodb/pull/58))
 
 ## [0.0.4] - 2024-04-12
+
 - Queries that attempt to compare a column to a column in the query root table, or a related table, will now fail instead of giving the incorrect result ([#22](https://github.com/hasura/ndc-mongodb/pull/22))
 - Fix bug in v2 to v3 conversion of query responses containing nested objects ([PR #27](https://github.com/hasura/ndc-mongodb/pull/27))
 - Fixed bug where use of aggregate functions in queries would fail ([#26](https://github.com/hasura/ndc-mongodb/pull/26))
@@ -37,6 +43,7 @@ This changelog documents the changes between release versions.
 - The collection primary key `_id` property now has a unique constraint generated in the NDC schema for it ([#32](https://github.com/hasura/ndc-mongodb/pull/32))
 
 ## [0.0.3] - 2024-03-28
+
 - Use separate schema files for each collection ([PR #14](https://github.com/hasura/ndc-mongodb/pull/14))
 - Changes to `update` CLI command ([PR #17](https://github.com/hasura/ndc-mongodb/pull/17)):
   - new default behaviour:
@@ -48,7 +55,9 @@ This changelog documents the changes between release versions.
 - Add `any` type and use it to represent mismatched types in sample documents ([PR #18](https://github.com/hasura/ndc-mongodb/pull/18))
 
 ## [0.0.2] - 2024-03-26
+
 - Rename CLI plugin to ndc-mongodb ([PR #13](https://github.com/hasura/ndc-mongodb/pull/13))
 
 ## [0.0.1] - 2024-03-22
+
 Initial release
diff --git a/crates/configuration/src/configuration.rs b/crates/configuration/src/configuration.rs
index e7719ec7..f028a504 100644
--- a/crates/configuration/src/configuration.rs
+++ b/crates/configuration/src/configuration.rs
@@ -2,7 +2,6 @@ use std::{collections::BTreeMap, path::Path};
 
 use anyhow::{anyhow, ensure};
 use itertools::Itertools;
-use mongodb_support::BsonScalarType;
 use ndc_models as ndc;
 use serde::{Deserialize, Serialize};
 
@@ -282,12 +281,12 @@ fn get_primary_key_uniqueness_constraint(
     name: &str,
     collection_type: &str,
 ) -> Option<(String, ndc::UniquenessConstraint)> {
-    // Check to make sure our collection's object type contains the _id objectid field
+    // Check to make sure our collection's object type contains the _id field
     // If it doesn't (should never happen, all collections need an _id column), don't generate the constraint
     let object_type = object_types.get(collection_type)?;
     let id_field = object_type.fields.get("_id")?;
     match &id_field.r#type {
-        schema::Type::Scalar(BsonScalarType::ObjectId) => Some(()),
+        schema::Type::Scalar(scalar_type) if scalar_type.is_comparable() => Some(()),
         _ => None,
     }?;
     let uniqueness_constraint = ndc::UniquenessConstraint {