Skip to content

Commit

Permalink
Do not make _id fields nullable when sampling documents (#78)
Browse files Browse the repository at this point in the history
* Do not make _id fields nullable when sampling documents

* Add changelog entry

---------

Co-authored-by: Brandon Martin <brandon@codedmart.com>
  • Loading branch information
dmoverton and codedmart authored Jun 13, 2024
1 parent 82b6739 commit 5ab20ab
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 19 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ This changelog documents the changes between release versions.
- Implement column-to-column comparisons within the same collection ([#74](https://github.com/hasura/ndc-mongodb/pull/74))
- Fix error tracking collection with no documents by skipping such collections during CLI introspection ([#76](https://github.com/hasura/ndc-mongodb/pull/76))
- If a field contains both `int` and `double` values then the field type is inferred as `double` instead of `ExtendedJSON` ([#77](https://github.com/hasura/ndc-mongodb/pull/77))
- Fix: schema generated with `_id` column nullable when introspecting schema via sampling ([#78](https://github.com/hasura/ndc-mongodb/pull/78))

## [0.0.6] - 2024-05-01
- Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67))
Expand Down
97 changes: 80 additions & 17 deletions crates/cli/src/introspection/sampling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type ObjectType = WithName<schema::ObjectType>;
/// are not unifiable.
pub async fn sample_schema_from_db(
sample_size: u32,
all_schema_nullalble: bool,
all_schema_nullable: bool,
config_file_changed: bool,
state: &ConnectorState,
existing_schemas: &HashSet<std::string::String>,
Expand All @@ -36,7 +36,7 @@ pub async fn sample_schema_from_db(
let collection_schema = sample_schema_from_collection(
&collection_name,
sample_size,
all_schema_nullalble,
all_schema_nullable,
state,
)
.await?;
Expand All @@ -53,7 +53,7 @@ pub async fn sample_schema_from_db(
async fn sample_schema_from_collection(
collection_name: &str,
sample_size: u32,
all_schema_nullalble: bool,
all_schema_nullable: bool,
state: &ConnectorState,
) -> anyhow::Result<Option<Schema>> {
let db = state.database();
Expand All @@ -63,8 +63,14 @@ async fn sample_schema_from_collection(
.aggregate(vec![doc! {"$sample": { "size": sample_size }}], options)
.await?;
let mut collected_object_types = vec![];
let is_collection_type = true;
while let Some(document) = cursor.try_next().await? {
let object_types = make_object_type(collection_name, &document, all_schema_nullalble);
let object_types = make_object_type(
collection_name,
&document,
is_collection_type,
all_schema_nullable,
);
collected_object_types = if collected_object_types.is_empty() {
object_types
} else {
Expand All @@ -91,14 +97,21 @@ async fn sample_schema_from_collection(
fn make_object_type(
object_type_name: &str,
document: &Document,
all_schema_nullalble: bool,
is_collection_type: bool,
all_schema_nullable: bool,
) -> Vec<ObjectType> {
let (mut object_type_defs, object_fields) = {
let type_prefix = format!("{object_type_name}_");
let (object_type_defs, object_fields): (Vec<Vec<ObjectType>>, Vec<ObjectField>) = document
.iter()
.map(|(field_name, field_value)| {
make_object_field(&type_prefix, field_name, field_value, all_schema_nullalble)
make_object_field(
&type_prefix,
field_name,
field_value,
is_collection_type,
all_schema_nullable,
)
})
.unzip();
(object_type_defs.concat(), object_fields)
Expand All @@ -120,19 +133,21 @@ fn make_object_field(
type_prefix: &str,
field_name: &str,
field_value: &Bson,
all_schema_nullalble: bool,
is_collection_type: bool,
all_schema_nullable: bool,
) -> (Vec<ObjectType>, ObjectField) {
let object_type_name = format!("{type_prefix}{field_name}");
let (collected_otds, field_type) =
make_field_type(&object_type_name, field_value, all_schema_nullalble);
make_field_type(&object_type_name, field_value, all_schema_nullable);
let object_field_value = WithName::named(
field_name.to_owned(),
schema::ObjectField {
description: None,
r#type: field_type,
},
);
let object_field = if all_schema_nullalble {
let object_field = if all_schema_nullable && !(is_collection_type && field_name == "_id") {
// The _id field on a collection type should never be nullable.
make_nullable_field(object_field_value)
} else {
object_field_value
Expand All @@ -145,16 +160,16 @@ fn make_object_field(
pub fn type_from_bson(
object_type_name: &str,
value: &Bson,
all_schema_nullalble: bool,
all_schema_nullable: bool,
) -> (BTreeMap<std::string::String, schema::ObjectType>, Type) {
let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullalble);
let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullable);
(WithName::into_map(object_types), t)
}

fn make_field_type(
object_type_name: &str,
field_value: &Bson,
all_schema_nullalble: bool,
all_schema_nullable: bool,
) -> (Vec<ObjectType>, Type) {
fn scalar(t: BsonScalarType) -> (Vec<ObjectType>, Type) {
(vec![], Type::Scalar(t))
Expand All @@ -168,7 +183,7 @@ fn make_field_type(
let mut result_type = Type::Scalar(Undefined);
for elem in arr {
let (elem_collected_otds, elem_type) =
make_field_type(object_type_name, elem, all_schema_nullalble);
make_field_type(object_type_name, elem, all_schema_nullable);
collected_otds = if collected_otds.is_empty() {
elem_collected_otds
} else {
Expand All @@ -179,7 +194,13 @@ fn make_field_type(
(collected_otds, Type::ArrayOf(Box::new(result_type)))
}
Bson::Document(document) => {
let collected_otds = make_object_type(object_type_name, document, all_schema_nullalble);
let is_collection_type = false;
let collected_otds = make_object_type(
object_type_name,
document,
is_collection_type,
all_schema_nullable,
);
(collected_otds, Type::Object(object_type_name.to_owned()))
}
Bson::Boolean(_) => scalar(Bool),
Expand Down Expand Up @@ -220,7 +241,7 @@ mod tests {
let object_name = "foo";
let doc = doc! {"my_int": 1, "my_string": "two"};
let result =
WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false));
WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false, false));

let expected = BTreeMap::from([(
object_name.to_owned(),
Expand Down Expand Up @@ -250,12 +271,54 @@ mod tests {
Ok(())
}

#[test]
fn simple_doc_nullable_fields() -> Result<(), anyhow::Error> {
let object_name = "foo";
let doc = doc! {"my_int": 1, "my_string": "two", "_id": 0};
let result =
WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, true, true));

let expected = BTreeMap::from([(
object_name.to_owned(),
ObjectType {
fields: BTreeMap::from([
(
"_id".to_owned(),
ObjectField {
r#type: Type::Scalar(BsonScalarType::Int),
description: None,
},
),
(
"my_int".to_owned(),
ObjectField {
r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::Int))),
description: None,
},
),
(
"my_string".to_owned(),
ObjectField {
r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::String))),
description: None,
},
),
]),
description: None,
},
)]);

assert_eq!(expected, result);

Ok(())
}

#[test]
fn array_of_objects() -> Result<(), anyhow::Error> {
let object_name = "foo";
let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": "wut", "baz": 3.77}]};
let result =
WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false));
WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false, false));

let expected = BTreeMap::from([
(
Expand Down Expand Up @@ -316,7 +379,7 @@ mod tests {
let object_name = "foo";
let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": 17, "baz": 3.77}]};
let result =
WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false));
WithName::into_map::<BTreeMap<_, _>>(make_object_type(object_name, &doc, false, false));

let expected = BTreeMap::from([
(
Expand Down
2 changes: 1 addition & 1 deletion crates/cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> {
None => configuration_options.introspection_options.no_validator_schema
};
let all_schema_nullable = match args.all_schema_nullable {
Some(validator) => validator,
Some(b) => b,
None => configuration_options.introspection_options.all_schema_nullable
};
let config_file_changed = configuration::get_config_file_changed(&context.path).await?;
Expand Down
2 changes: 1 addition & 1 deletion crates/configuration/src/configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ pub struct ConfigurationIntrospectionOptions {
// Whether to try validator schema first if one exists.
pub no_validator_schema: bool,

// Default to setting all schema fields as nullable.
// Default to setting all schema fields, except the _id field on collection types, as nullable.
pub all_schema_nullable: bool,
}

Expand Down

0 comments on commit 5ab20ab

Please sign in to comment.