diff --git a/puente-analytics-service/lambdas/etl/silver/dimensions.py b/puente-analytics-service/lambdas/etl/silver/dimensions.py index 55e22a7..9be3b80 100644 --- a/puente-analytics-service/lambdas/etl/silver/dimensions.py +++ b/puente-analytics-service/lambdas/etl/silver/dimensions.py @@ -701,12 +701,18 @@ def get_custom_form_questions(form_results): "phoneOS" ] + print("1") + print(form_results[form_results['title']=='Nombre de Medicamento']) + options_fr = form_results[~form_results["title"].isin(ignore_questions)] options = options_fr.groupby(["title"])["question_answer"].agg(lambda x: unique_values(x)).reset_index().rename({"question_answer": "options"}, axis=1) options["num_answers"] = options["options"].apply(len) options_fr = options_fr.merge(options, on="title", how="left") + print("2") + print(options_fr[options_fr['title']=='Nombre de Medicamento']) + options_fr["field_type"] = None options_fr["is_list"] = options_fr["question_answer"].apply(lambda x: isinstance(x, list)) @@ -719,12 +725,18 @@ def get_custom_form_questions(form_results): existing_forms = list(query_db("SELECT DISTINCT uuid FROM form_dim")["uuid"].unique()) options_fr = options_fr[options_fr["form_id"].isin(existing_forms)] + print("3") + print(options_fr[options_fr['title']=='Nombre de Medicamento']) + inserted_uuids = [] existing_qs = list(query_db("SELECT DISTINCT question FROM question_dim")["question"].unique()) options_fr = options_fr[~options_fr["title"].isin(existing_qs)] options_fr = coalesce_pkey(options_fr, "title") + print("4") + print(options_fr[options_fr['title']=='Nombre de Medicamento']) + for i, row in options_fr.iterrows(): form = row.get("formSpecificationsId") form_created_at = row.get("createdAt") diff --git a/puente-analytics-service/lambdas/etl/silver/facts.py b/puente-analytics-service/lambdas/etl/silver/facts.py index cc6d17f..8aeb4ed 100644 --- a/puente-analytics-service/lambdas/etl/silver/facts.py +++ b/puente-analytics-service/lambdas/etl/silver/facts.py @@ -26,6 +26,7 @@ def get_custom_forms(df): fk_missing_rows = [] missing_qa_rows = [] + # df["fields"] = df["fields"].apply(json.loads) # exploded_df = df.explode("fields") @@ -70,6 +71,8 @@ def get_custom_forms(df): exploded_df = exploded_df[~exploded_df["title"].isin(existing_qs)] + existing_forms = list(query_db("SELECT DISTINCT uuid FROM form_dim")["uuid"].unique()) + for i, row in exploded_df.iterrows(): object_id = row.get("objectId") @@ -115,6 +118,11 @@ def get_custom_forms(df): form_id = md5_encode(form) community_id = md5_encode(community_name) + if form_id not in existing_forms: + continue + + + ignore_questions = [ 'surveyinguser', 'surveyingorganization',