From 82b673904ae240bc5f9d1f8b9e06c7dca814d6b2 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Thu, 13 Jun 2024 10:25:15 -0700 Subject: [PATCH 1/3] fix bson serialization test for case where double and int unify to double (#80) In #77 I neglected to adjust our round-trip bson-to-json serialization proptest to handle cases where we treat ints as interchangeable with doubles. This change adds a custom equality test for use solely in tests that can compare ints and doubles. --- .../cli/src/introspection/type_unification.rs | 10 ++++++++ .../query/serialization/tests.txt | 1 + .../src/query/serialization/tests.rs | 25 +++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs index 31e539e1..bf997c3f 100644 --- a/crates/cli/src/introspection/type_unification.rs +++ b/crates/cli/src/introspection/type_unification.rs @@ -174,6 +174,16 @@ pub fn unify_object_types( merged_type_map.into_values().collect() } +/// True iff we consider a to be a supertype of b. +/// +/// Note that if you add more supertypes here then it is important to also update the custom +/// equality check in our tests in mongodb_agent_common::query::serialization::tests. Equality +/// needs to be transitive over supertypes, so for example if we have, +/// +/// (Double, Int), (Decimal, Double) +/// +/// then in addition to comparing ints to doubles, and doubles to decimals, we also need to compare +/// decimals to ints. fn is_supertype(a: &BsonScalarType, b: &BsonScalarType) -> bool { matches!((a, b), (Double, Int)) } diff --git a/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt b/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt index 8a816d59..8304681d 100644 --- a/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt +++ b/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt @@ -8,3 +8,4 @@ cc 2efdea7f185f2f38ae643782b3523014ab7b8236e36a79cc6b7a7cac74b06f79 # shrinks to cc 26e2543468ab6d4ffa34f9f8a2c920801ef38a35337557a8f4e74c92cf57e344 # shrinks to bson = Document({" ": Document({"ยก": DateTime(1970-01-01 0:00:00.001 +00:00:00)})}) cc 7d760e540b56fedac7dd58e5bdb5bb9613b9b0bc6a88acfab3fc9c2de8bf026d # shrinks to bson = Document({"A": Array([Null, Undefined])}) cc 21360610045c5a616b371fb8d5492eb0c22065d62e54d9c8a8761872e2e192f3 # shrinks to bson = Array([Document({}), Document({" ": Null})]) +cc 8842e7f78af24e19847be5d8ee3d47c547ef6c1bb54801d360a131f41a87f4fa diff --git a/crates/mongodb-agent-common/src/query/serialization/tests.rs b/crates/mongodb-agent-common/src/query/serialization/tests.rs index 75395f41..9d65368b 100644 --- a/crates/mongodb-agent-common/src/query/serialization/tests.rs +++ b/crates/mongodb-agent-common/src/query/serialization/tests.rs @@ -21,8 +21,10 @@ proptest! { let json = bson_to_json(&inferred_type, bson.clone()).map_err(|e| error_context("error converting bson to json", e.to_string()))?; let actual = json_to_bson(&inferred_type, json.clone()).map_err(|e| error_context("error converting json to bson", e.to_string()))?; - prop_assert_eq!(actual, bson, - "\ninferred type: {:?}\nobject types: {:?}\njson_representation: {}", + prop_assert!(custom_eq(&actual, &bson), + "`(left == right)`\nleft: `{:?}`\nright: `{:?}`\ninferred type: {:?}\nobject types: {:?}\njson_representation: {}", + actual, + bson, inferred_type, object_types, serde_json::to_string_pretty(&json).unwrap() @@ -40,3 +42,22 @@ proptest! { prop_assert_eq!(actual, bson, "json representation: {}", json) } } + +/// We are treating doubles as a superset of ints, so we need an equality check that allows +/// comparing those types. +fn custom_eq(a: &Bson, b: &Bson) -> bool { + match (a, b) { + (Bson::Double(a), Bson::Int32(b)) | (Bson::Int32(b), Bson::Double(a)) => *a == *b as f64, + (Bson::Array(xs), Bson::Array(ys)) => { + xs.len() == ys.len() && xs.iter().zip(ys.iter()).all(|(x, y)| custom_eq(x, y)) + } + (Bson::Document(a), Bson::Document(b)) => { + a.len() == b.len() + && a.iter().all(|(key_a, value_a)| match b.get(key_a) { + Some(value_b) => custom_eq(value_a, value_b), + None => false, + }) + } + _ => a == b, + } +} From 5ab20aba90d2aa26729336cabfeb2d88d32fcf1e Mon Sep 17 00:00:00 2001 From: David Overton Date: Fri, 14 Jun 2024 04:43:47 +1000 Subject: [PATCH 2/3] Do not make _id fields nullable when sampling documents (#78) * Do not make _id fields nullable when sampling documents * Add changelog entry --------- Co-authored-by: Brandon Martin --- CHANGELOG.md | 1 + crates/cli/src/introspection/sampling.rs | 97 +++++++++++++++++++---- crates/cli/src/lib.rs | 2 +- crates/configuration/src/configuration.rs | 2 +- 4 files changed, 83 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 98841a11..f0ada28d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ This changelog documents the changes between release versions. - Implement column-to-column comparisons within the same collection ([#74](https://github.com/hasura/ndc-mongodb/pull/74)) - Fix error tracking collection with no documents by skipping such collections during CLI introspection ([#76](https://github.com/hasura/ndc-mongodb/pull/76)) - If a field contains both `int` and `double` values then the field type is inferred as `double` instead of `ExtendedJSON` ([#77](https://github.com/hasura/ndc-mongodb/pull/77)) +- Fix: schema generated with `_id` column nullable when introspecting schema via sampling ([#78](https://github.com/hasura/ndc-mongodb/pull/78)) ## [0.0.6] - 2024-05-01 - Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67)) diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs index 47e3dec6..51a5f720 100644 --- a/crates/cli/src/introspection/sampling.rs +++ b/crates/cli/src/introspection/sampling.rs @@ -21,7 +21,7 @@ type ObjectType = WithName; /// are not unifiable. pub async fn sample_schema_from_db( sample_size: u32, - all_schema_nullalble: bool, + all_schema_nullable: bool, config_file_changed: bool, state: &ConnectorState, existing_schemas: &HashSet, @@ -36,7 +36,7 @@ pub async fn sample_schema_from_db( let collection_schema = sample_schema_from_collection( &collection_name, sample_size, - all_schema_nullalble, + all_schema_nullable, state, ) .await?; @@ -53,7 +53,7 @@ pub async fn sample_schema_from_db( async fn sample_schema_from_collection( collection_name: &str, sample_size: u32, - all_schema_nullalble: bool, + all_schema_nullable: bool, state: &ConnectorState, ) -> anyhow::Result> { let db = state.database(); @@ -63,8 +63,14 @@ async fn sample_schema_from_collection( .aggregate(vec![doc! {"$sample": { "size": sample_size }}], options) .await?; let mut collected_object_types = vec![]; + let is_collection_type = true; while let Some(document) = cursor.try_next().await? { - let object_types = make_object_type(collection_name, &document, all_schema_nullalble); + let object_types = make_object_type( + collection_name, + &document, + is_collection_type, + all_schema_nullable, + ); collected_object_types = if collected_object_types.is_empty() { object_types } else { @@ -91,14 +97,21 @@ async fn sample_schema_from_collection( fn make_object_type( object_type_name: &str, document: &Document, - all_schema_nullalble: bool, + is_collection_type: bool, + all_schema_nullable: bool, ) -> Vec { let (mut object_type_defs, object_fields) = { let type_prefix = format!("{object_type_name}_"); let (object_type_defs, object_fields): (Vec>, Vec) = document .iter() .map(|(field_name, field_value)| { - make_object_field(&type_prefix, field_name, field_value, all_schema_nullalble) + make_object_field( + &type_prefix, + field_name, + field_value, + is_collection_type, + all_schema_nullable, + ) }) .unzip(); (object_type_defs.concat(), object_fields) @@ -120,11 +133,12 @@ fn make_object_field( type_prefix: &str, field_name: &str, field_value: &Bson, - all_schema_nullalble: bool, + is_collection_type: bool, + all_schema_nullable: bool, ) -> (Vec, ObjectField) { let object_type_name = format!("{type_prefix}{field_name}"); let (collected_otds, field_type) = - make_field_type(&object_type_name, field_value, all_schema_nullalble); + make_field_type(&object_type_name, field_value, all_schema_nullable); let object_field_value = WithName::named( field_name.to_owned(), schema::ObjectField { @@ -132,7 +146,8 @@ fn make_object_field( r#type: field_type, }, ); - let object_field = if all_schema_nullalble { + let object_field = if all_schema_nullable && !(is_collection_type && field_name == "_id") { + // The _id field on a collection type should never be nullable. make_nullable_field(object_field_value) } else { object_field_value @@ -145,16 +160,16 @@ fn make_object_field( pub fn type_from_bson( object_type_name: &str, value: &Bson, - all_schema_nullalble: bool, + all_schema_nullable: bool, ) -> (BTreeMap, Type) { - let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullalble); + let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullable); (WithName::into_map(object_types), t) } fn make_field_type( object_type_name: &str, field_value: &Bson, - all_schema_nullalble: bool, + all_schema_nullable: bool, ) -> (Vec, Type) { fn scalar(t: BsonScalarType) -> (Vec, Type) { (vec![], Type::Scalar(t)) @@ -168,7 +183,7 @@ fn make_field_type( let mut result_type = Type::Scalar(Undefined); for elem in arr { let (elem_collected_otds, elem_type) = - make_field_type(object_type_name, elem, all_schema_nullalble); + make_field_type(object_type_name, elem, all_schema_nullable); collected_otds = if collected_otds.is_empty() { elem_collected_otds } else { @@ -179,7 +194,13 @@ fn make_field_type( (collected_otds, Type::ArrayOf(Box::new(result_type))) } Bson::Document(document) => { - let collected_otds = make_object_type(object_type_name, document, all_schema_nullalble); + let is_collection_type = false; + let collected_otds = make_object_type( + object_type_name, + document, + is_collection_type, + all_schema_nullable, + ); (collected_otds, Type::Object(object_type_name.to_owned())) } Bson::Boolean(_) => scalar(Bool), @@ -220,7 +241,7 @@ mod tests { let object_name = "foo"; let doc = doc! {"my_int": 1, "my_string": "two"}; let result = - WithName::into_map::>(make_object_type(object_name, &doc, false)); + WithName::into_map::>(make_object_type(object_name, &doc, false, false)); let expected = BTreeMap::from([( object_name.to_owned(), @@ -250,12 +271,54 @@ mod tests { Ok(()) } + #[test] + fn simple_doc_nullable_fields() -> Result<(), anyhow::Error> { + let object_name = "foo"; + let doc = doc! {"my_int": 1, "my_string": "two", "_id": 0}; + let result = + WithName::into_map::>(make_object_type(object_name, &doc, true, true)); + + let expected = BTreeMap::from([( + object_name.to_owned(), + ObjectType { + fields: BTreeMap::from([ + ( + "_id".to_owned(), + ObjectField { + r#type: Type::Scalar(BsonScalarType::Int), + description: None, + }, + ), + ( + "my_int".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::Int))), + description: None, + }, + ), + ( + "my_string".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::String))), + description: None, + }, + ), + ]), + description: None, + }, + )]); + + assert_eq!(expected, result); + + Ok(()) + } + #[test] fn array_of_objects() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": "wut", "baz": 3.77}]}; let result = - WithName::into_map::>(make_object_type(object_name, &doc, false)); + WithName::into_map::>(make_object_type(object_name, &doc, false, false)); let expected = BTreeMap::from([ ( @@ -316,7 +379,7 @@ mod tests { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": 17, "baz": 3.77}]}; let result = - WithName::into_map::>(make_object_type(object_name, &doc, false)); + WithName::into_map::>(make_object_type(object_name, &doc, false, false)); let expected = BTreeMap::from([ ( diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 34622108..46b510a5 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -56,7 +56,7 @@ async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { None => configuration_options.introspection_options.no_validator_schema }; let all_schema_nullable = match args.all_schema_nullable { - Some(validator) => validator, + Some(b) => b, None => configuration_options.introspection_options.all_schema_nullable }; let config_file_changed = configuration::get_config_file_changed(&context.path).await?; diff --git a/crates/configuration/src/configuration.rs b/crates/configuration/src/configuration.rs index 8c645515..e7719ec7 100644 --- a/crates/configuration/src/configuration.rs +++ b/crates/configuration/src/configuration.rs @@ -206,7 +206,7 @@ pub struct ConfigurationIntrospectionOptions { // Whether to try validator schema first if one exists. pub no_validator_schema: bool, - // Default to setting all schema fields as nullable. + // Default to setting all schema fields, except the _id field on collection types, as nullable. pub all_schema_nullable: bool, } From cd030f327f63c4ec8839b2a211d55f5ee78b8439 Mon Sep 17 00:00:00 2001 From: David Overton Date: Fri, 14 Jun 2024 04:53:29 +1000 Subject: [PATCH 3/3] Relax type requirements for primary uniqueness constraint (#79) * Don't require _id field to have type ObjectId when generate primary uniqueness constraint * Add changelog entry * Require comparable scalar type for _id * remove unused import to get clippy check passing --------- Co-authored-by: Jesse Hallett Co-authored-by: Brandon Martin --- CHANGELOG.md | 11 ++++++++++- crates/configuration/src/configuration.rs | 5 ++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0ada28d..02f26d0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,9 @@ # MongoDB Connector Changelog + This changelog documents the changes between release versions. ## [Unreleased] + - Support filtering and sorting by fields of related collections ([#72](https://github.com/hasura/ndc-mongodb/pull/72)) - Support for root collection column references ([#75](https://github.com/hasura/ndc-mongodb/pull/75)) - Fix for databases with field names that begin with a dollar sign, or that contain dots ([#74](https://github.com/hasura/ndc-mongodb/pull/74)) @@ -9,8 +11,10 @@ This changelog documents the changes between release versions. - Fix error tracking collection with no documents by skipping such collections during CLI introspection ([#76](https://github.com/hasura/ndc-mongodb/pull/76)) - If a field contains both `int` and `double` values then the field type is inferred as `double` instead of `ExtendedJSON` ([#77](https://github.com/hasura/ndc-mongodb/pull/77)) - Fix: schema generated with `_id` column nullable when introspecting schema via sampling ([#78](https://github.com/hasura/ndc-mongodb/pull/78)) +- Don't require _id field to have type ObjectId when generating primary uniqueness constraint ([#79](https://github.com/hasura/ndc-mongodb/pull/79)) ## [0.0.6] - 2024-05-01 + - Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67)) - To log all events set `RUST_LOG=mongodb::command=debug,mongodb::connection=debug,mongodb::server_selection=debug,mongodb::topology=debug` - Relations with a single column mapping now use concise correlated subquery syntax in `$lookup` stage ([#65](https://github.com/hasura/ndc-mongodb/pull/65)) @@ -21,15 +25,17 @@ This changelog documents the changes between release versions. - Note: `native_procedures` folder in configuration is not deprecated. It will continue to work for a few releases, but renaming your folder is all that is needed. ## [0.0.5] - 2024-04-26 + - Fix incorrect order of results for query requests with more than 10 variable sets (#37) - In the CLI update command, don't overwrite schema files that haven't changed ([#49](https://github.com/hasura/ndc-mongodb/pull/49/files)) - In the CLI update command, if the database URI is not provided the error message now mentions the correct environment variable to use (`MONGODB_DATABASE_URI`) ([#50](https://github.com/hasura/ndc-mongodb/pull/50)) - Update to latest NDC SDK ([#51](https://github.com/hasura/ndc-mongodb/pull/51)) -- Update `rustls` dependency to fix https://github.com/hasura/ndc-mongodb/security/dependabot/1 ([#51](https://github.com/hasura/ndc-mongodb/pull/51)) +- Update `rustls` dependency to fix ([#51](https://github.com/hasura/ndc-mongodb/pull/51)) - Serialize query and mutation response fields with known types using simple JSON instead of Extended JSON (#53) (#59) - Add trace spans ([#58](https://github.com/hasura/ndc-mongodb/pull/58)) ## [0.0.4] - 2024-04-12 + - Queries that attempt to compare a column to a column in the query root table, or a related table, will now fail instead of giving the incorrect result ([#22](https://github.com/hasura/ndc-mongodb/pull/22)) - Fix bug in v2 to v3 conversion of query responses containing nested objects ([PR #27](https://github.com/hasura/ndc-mongodb/pull/27)) - Fixed bug where use of aggregate functions in queries would fail ([#26](https://github.com/hasura/ndc-mongodb/pull/26)) @@ -37,6 +43,7 @@ This changelog documents the changes between release versions. - The collection primary key `_id` property now has a unique constraint generated in the NDC schema for it ([#32](https://github.com/hasura/ndc-mongodb/pull/32)) ## [0.0.3] - 2024-03-28 + - Use separate schema files for each collection ([PR #14](https://github.com/hasura/ndc-mongodb/pull/14)) - Changes to `update` CLI command ([PR #17](https://github.com/hasura/ndc-mongodb/pull/17)): - new default behaviour: @@ -48,7 +55,9 @@ This changelog documents the changes between release versions. - Add `any` type and use it to represent mismatched types in sample documents ([PR #18](https://github.com/hasura/ndc-mongodb/pull/18)) ## [0.0.2] - 2024-03-26 + - Rename CLI plugin to ndc-mongodb ([PR #13](https://github.com/hasura/ndc-mongodb/pull/13)) ## [0.0.1] - 2024-03-22 + Initial release diff --git a/crates/configuration/src/configuration.rs b/crates/configuration/src/configuration.rs index e7719ec7..f028a504 100644 --- a/crates/configuration/src/configuration.rs +++ b/crates/configuration/src/configuration.rs @@ -2,7 +2,6 @@ use std::{collections::BTreeMap, path::Path}; use anyhow::{anyhow, ensure}; use itertools::Itertools; -use mongodb_support::BsonScalarType; use ndc_models as ndc; use serde::{Deserialize, Serialize}; @@ -282,12 +281,12 @@ fn get_primary_key_uniqueness_constraint( name: &str, collection_type: &str, ) -> Option<(String, ndc::UniquenessConstraint)> { - // Check to make sure our collection's object type contains the _id objectid field + // Check to make sure our collection's object type contains the _id field // If it doesn't (should never happen, all collections need an _id column), don't generate the constraint let object_type = object_types.get(collection_type)?; let id_field = object_type.fields.get("_id")?; match &id_field.r#type { - schema::Type::Scalar(BsonScalarType::ObjectId) => Some(()), + schema::Type::Scalar(scalar_type) if scalar_type.is_comparable() => Some(()), _ => None, }?; let uniqueness_constraint = ndc::UniquenessConstraint {