Skip to content

Commit

Permalink
Added questionnaire data analyses pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Hsankesara committed Oct 19, 2022
1 parent a88a102 commit 20e1022
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 0 deletions.
3 changes: 3 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .questionnnairdataanalyses.features import (
QuestionnnairDataAnalyses
)
23 changes: 23 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
project:
project_name: Questionnnair data analyses
description: Questionnaire data analyses code from ART piplot analyses
version: 0.0

input_data:
data_location: local
local_directory: /Users/heetsankesara/work/ART/data_cleaning/questionnaire_data/
data_format: csv

configurations:
df_type: 'pandas'

features:
- location: /Users/heetsankesara/work/RADAR/radar-pipeline/questionnnairdataanalyses
feature_groups:
- QuestionnnairDataAnalyses

output_data:
output_location: local
local_directory: passive_output
data_format: csv
compress: false
78 changes: 78 additions & 0 deletions questionnnairdataanalyses/features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pandas as pd
from radarpipeline.datalib import RadarData
from radarpipeline.features import Feature, FeatureGroup


class QuestionnnairDataAnalyses(FeatureGroup):
def __init__(self):
name = "QuestionnnairDataAnalyses"
description = "contains Questionnnaire Data Features"
features = [QuestionnaireNotificationResponseLatency, QuestionnaireCompletionTime]
super().__init__(name, description, features)

def preprocess(self, data: RadarData) -> RadarData:
"""
Preprocess the data for each feature in the group.
"""
all_ques_list = {}
for key in self.required_input_data:
all_ques_list[key] = data.get_combined_data_by_variable(key)[['key.projectId', 'key.userId', 'key.sourceId', 'value.time',
'value.timeCompleted', 'value.timeNotification', 'value.name']]
df_ques_app_event = pd.concat(all_ques_list.values()).reset_index(drop=True)
df_ques_app_event['value.time'] = pd.to_datetime(df_ques_app_event['value.time'], unit="s")
df_ques_app_event['value.timeNotification'] = pd.to_datetime(df_ques_app_event['value.timeNotification'], unit="s")
df_ques_app_event['value.timeCompleted'] = pd.to_datetime(df_ques_app_event['value.timeCompleted'], unit="s")
df_ques_app_event = df_ques_app_event.rename({"key.userId" : "uid", "value.time" : "time", "value.timeNotification": "time_notification",
"value.timeCompleted": "time_completed", "value.name":"name"}, axis=1)
df_ques_app_event = df_ques_app_event.drop_duplicates(subset=["uid", "time", "name"]).reset_index(drop=True)
df_ques_app_event["date"] = df_ques_app_event["time"].dt.date
df_ques_app_event = df_ques_app_event.sort_values(by=["uid", "time_notification", "time_completed"]).reset_index(drop=True)
return df_ques_app_event

def compute_features(self, data: RadarData) -> RadarData:
"""
compute and combine the features for each feature in the group.
"""
pass

class QuestionnaireNotificationResponseLatency(Feature):
def __init__(self):
self.name = "QuestionnaireNotificationResponseLatency"
self.description = "The time it took for participants to start filling out the questionnaires after receiving the notification."
self.required_input_data = ["questionnaire_ari_self", "questionnaire_gad7", "questionnaire_phq8", "questionnaire_rpq", "questionnaire_baars_iv"]

def preprocess(self, data):
"""
Preprocess the data for each feature in the group.
"""
return data

def calculate(self, data) -> float:
df_ques_app_event = data
df_ques_app_event_summary = df_ques_app_event.groupby(["uid", "time_notification"]).agg({"time": min, "time_completed":max}).reset_index().rename({"time":"start_time", "time_completed":"finished_time"}, axis=1)
df_ques_app_event_summary["notification_response_time"] = df_ques_app_event_summary["start_time"] - df_ques_app_event_summary["time_notification"]
df_ques_app_event_summary["total_completion_time"] = df_ques_app_event_summary["finished_time"] - df_ques_app_event_summary["time_notification"]
df_ques_app_event_summary["notification_response_time_sec"] = df_ques_app_event_summary["notification_response_time"].dt.total_seconds()
return df_ques_app_event_summary




class QuestionnaireCompletionTime(Feature):
def __init__(self):
self.name = "QuestionnaireCompletionTime"
self.description = "The time it took for participants to finish the questionnaires after starting them. "
self.required_input_data = ["questionnaire_ari_self", "questionnaire_gad7", "questionnaire_phq8", "questionnaire_rpq", "questionnaire_baars_iv"]

def preprocess(self, data):
"""
Preprocess the data for each feature in the group.
"""
return data

def calculate(self, data) -> float:
df_ques_app_event = data
df_ques_app_event_summary = df_ques_app_event.groupby(["uid", "time_notification"]).agg({"time": min, "time_completed":max}).reset_index().rename({"time":"start_time", "time_completed":"finished_time"}, axis=1)
df_ques_app_event_summary["questionnaire_completion_time"] = df_ques_app_event_summary["finished_time"] - df_ques_app_event_summary["start_time"]
df_ques_app_event_summary["questionnaire_completion_time_sec"] = df_ques_app_event_summary["questionnaire_completion_time"].dt.total_seconds()
return df_ques_app_event_summary

0 comments on commit 20e1022

Please sign in to comment.