Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(schema): add support for extensions on primitive types #7

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion cumulus_fhir_support/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""FHIR support code for the Cumulus project"""

__version__ = "1.2.1"
__version__ = "1.3.0"

from .json import list_multiline_json_in_dir, read_multiline_json, read_multiline_json_from_dir
from .schemas import pyarrow_schema_from_rows
69 changes: 51 additions & 18 deletions cumulus_fhir_support/schemas.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""Detect FHIR resource schemas"""

from collections import namedtuple
from functools import partial
from typing import Any, Iterable, Optional

import pyarrow
from fhirclient.models import (
codeableconcept,
coding,
element,
extension,
fhirabstractbase,
fhirdate,
Expand Down Expand Up @@ -140,7 +140,8 @@ def _create_pyarrow_schema_for_resource(
"""
instance = fhirelementfactory.FHIRElementFactory.instantiate(resource_type, None)

# fhirclient doesn't include `resourceType` in the list of properties. So do that manually.
# fhirclient doesn't include `resourceType` in the list of properties, because it's only
# used in ndjson representations. But it's useful to have, so add it manually.
type_field = pyarrow.field("resourceType", pyarrow.string())

level = 0 if wide else 2
Expand All @@ -153,27 +154,27 @@ def _fhir_obj_to_pyarrow_fields(
base_obj: fhirabstractbase.FHIRAbstractBase, batch_shape: dict, *, level: int
) -> list[pyarrow.Field]:
"""Convert a FHIR instance to a PyArrow Field schema list"""
properties = map(FhirProperty._make, base_obj.elementProperties())
return list(
filter(
None,
map(
partial(
_fhir_to_pyarrow_property,
base_obj=base_obj,
batch_shape=batch_shape,
level=level,
),
properties,
),
)
)
fhir_properties = map(FhirProperty._make, base_obj.elementProperties())
pa_properties = []

for fhir_property in fhir_properties:
if pa_property := _fhir_to_pyarrow_property(
fhir_property,
base_obj=base_obj,
batch_shape=batch_shape,
level=level,
):
pa_properties.append(pa_property)
if pa_sunder := _sunder_to_pyarrow_property(fhir_property, batch_shape=batch_shape):
pa_properties.append(pa_sunder)

return pa_properties


def _fhir_to_pyarrow_property(
prop: FhirProperty,
*,
base_obj: fhirabstractbase.FHIRAbstractBase,
base_obj: Optional[fhirabstractbase.FHIRAbstractBase] = None,
batch_shape: dict = None,
level: int,
) -> Optional[pyarrow.Field]:
Expand Down Expand Up @@ -222,6 +223,38 @@ def _fhir_to_pyarrow_property(
return pyarrow.field(prop.json_name, pyarrow_type, nullable=True)


def _sunder_to_pyarrow_property(
prop: FhirProperty,
*,
batch_shape: Optional[dict] = None,
) -> Optional[pyarrow.Field]:
"""
Checks for a FhirProperty's "sunder" sibling and returns a PyArrow field for it.

A sunder (single underscore) field is an adjacent JSON field for primitive types that don't
otherwise have a place to put extension information. So "status" might have a sibling
"_status" field.

See http://hl7.org/fhir/R4/json.html#primitive for more information.

Returns None if the sunder field isn't present.
"""
# First, check if the sunder version is even present.
if not batch_shape or f"_{prop.json_name}" not in batch_shape:
return None

# Make a fake property definition and see if it's good.
sunder_prop = FhirProperty(
name=f"_{prop.name}",
json_name=f"_{prop.json_name}",
pytype=element.Element,
is_list=prop.is_list,
of_many=prop.of_many,
required=prop.required,
)
return _fhir_to_pyarrow_property(sunder_prop, level=LEVEL_INCLUSION, batch_shape=batch_shape)


def _basic_fhir_to_pyarrow_type(pytype: type) -> pyarrow.DataType:
"""Converts a basic python type to a Pyspark type"""
if pytype is int:
Expand Down
71 changes: 71 additions & 0 deletions tests/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,74 @@ def test_unexpected_fhir_type(self, mock_instantiate):
mock_instantiate.return_value = mock_resource
with self.assertRaisesRegex(ValueError, "Unexpected type: <class 'object'>"):
support.pyarrow_schema_from_rows("AllergyIntolerance")

def test_primitive_field_extension(self):
"""Verify that we support extensions to primitive fields"""
# See http://hl7.org/fhir/R4/json.html#primitive for details
rows = [
{
# Non-existant sunder field
"_doesNotExist": {"id": "test-fake"},
# Extension only, no ID
"_status": {"extension": [{"valueCode": "test-status"}]},
# ID only, no extension (but with bogus modifierExtension that will be ignored)
"_priority": {"id": "test-priority", "modifierExtension": "not-supported"},
# Array
"_instantiatesUri": [
None,
{"id": "test-array"},
{"extension": [{"url": "test"}]},
],
# Deep field
"dispenseRequest": {
"validityPeriod": {"_start": {"id": "test-start"}},
},
}
]
schema = support.pyarrow_schema_from_rows("MedicationRequest", rows)

self.assertEqual(-1, schema.get_field_index("_doesNotExist"))
self.assertEqual(-1, schema.get_field_index("_intent")) # never specified
self.assertEqual(
pyarrow.struct(
{
"extension": pyarrow.list_(
pyarrow.struct(
{
"valueCode": pyarrow.string(),
}
)
),
}
),
schema.field("_status").type,
)
self.assertEqual(
pyarrow.struct({"id": pyarrow.string()}),
schema.field("_priority").type,
)
self.assertEqual(
pyarrow.list_(
pyarrow.struct(
{
"extension": pyarrow.list_(
pyarrow.struct(
{
"url": pyarrow.string(),
}
)
),
"id": pyarrow.string(),
}
)
),
schema.field("_instantiatesUri").type,
)
self.assertEqual(
pyarrow.struct(
{
"id": pyarrow.string(),
}
),
schema.field("dispenseRequest").type.field("validityPeriod").type.field("_start").type,
)
Loading