Skip to content

Commit

Permalink
handle KeyError for all null values in dataframe
Browse files Browse the repository at this point in the history
Transformed a set to a list in format_dtypes (utils.py)
to avoid compatibility issues in newer pandas version.
  • Loading branch information
Jaroslaw Michalski committed Mar 15, 2023
1 parent 75831c0 commit b92eb4b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 15 deletions.
34 changes: 20 additions & 14 deletions app/engine/from_db/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,26 @@ def process_generic_data(results: Iterable[Record], request: Request) -> DataFra
for metric in filter(response_metrics.__contains__, MetricData.generic_dtypes)
}

payload = (
df
.pivot_table(
values="value",
index=MetricData.base_metrics,
columns="metric",
aggfunc='first'
try:
payload = (
df
.pivot_table(
values="value",
index=MetricData.base_metrics,
columns="metric",
aggfunc='first'
)
.reset_index()
.sort_values(["date", "areaCode"], ascending=[False, True])
.pipe(format_dtypes, column_types=column_types)
.loc[:, [*MetricData.base_metrics, *response_metrics]]
.pipe(format_msoas, request=request)
.pipe(format_data, response_metrics=response_metrics)
)
.reset_index()
.sort_values(["date", "areaCode"], ascending=[False, True])
.pipe(format_dtypes, column_types=column_types)
.loc[:, [*MetricData.base_metrics, *response_metrics]]
.pipe(format_msoas, request=request)
.pipe(format_data, response_metrics=response_metrics)
)
except KeyError as err:
# This can happen if there are only null values in the df
# then some operations on the dataframe can't be performed
# Return the expected Dataframe object
payload = DataFrame()

return payload
3 changes: 2 additions & 1 deletion app/engine/from_db/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ async def cache_response(func, *, request: Request, **kwargs) -> bool:


def format_dtypes(df: DataFrame, column_types: Dict[str, object]) -> DataFrame:
json_columns = MetricData.json_dtypes.intersection(column_types)
# passing a list instead of a set to avid compatibility with new pandas version
json_columns = list(MetricData.json_dtypes.intersection(column_types))

# Replace `null` string with None. This happens because
# some DB queries convert `null` to `"null"` for type
Expand Down

0 comments on commit b92eb4b

Please sign in to comment.