-
Notifications
You must be signed in to change notification settings - Fork 1
/
adc_export.py
139 lines (108 loc) · 4.94 KB
/
adc_export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
'''data_export_api -- Export data from REDCap projects into CSV files
Usage:
python data_export_api.py example.ini 11
Based on: pioneers/active_studies/study_refresh.py
and http://pycap.readthedocs.org/en/latest/deep.html#working-with-files
Boostrap project structure is based on DataExportBoostrap_DataDictionary.csv
'''
import configparser
import logging
from redcap import RedcapError
log = logging.getLogger(__name__)
def main(get_config,
bootstrap_form='form_selection',
# TODO: provide user with the ability to choose format
file_format='csv',
chunk_size=50):
# TODO: Allow users to provide multiple PIDs
pid, bs_proj, data_proj, open_dest = get_config()
bs_data = bs_proj.export_records(format='json', forms=[bootstrap_form])
log.info('Initiating export related to %s bootstrap records for pid:%s',
len(bs_data), pid)
for row in bs_data:
field_names = tuple(row['fieldnames'].split(','))
file_name = (row['formname']
if row['filename'] is None or row['filename'] == ''
else row['filename'])
# Fix to include def_field in form exports (ref: #3426).
if field_names == ('',):
field_names = (data_proj.def_field,) + field_names
op_file = open_dest(file_name, file_format)
record_list = data_proj.export_records(fields=[data_proj.def_field])
records = list(set([str(r[data_proj.def_field].encode('utf-8')) for r in record_list]))
# From:http://pycap.readthedocs.org/en/latest/deep.html#working-with-files # noqa
try:
log.info('Initiating export of data for pid:%s, form:%s ',
pid, row['formname'])
header_written = False
log.info('Records: %s', records)
for record_chunk in chunks(records, chunk_size):
log.info('Chunk: %s to %s', record_chunk[0], record_chunk[-1])
data = data_proj.export_records(records=record_chunk,
format=file_format,
forms=[row['formname'], ],
event_name='unique')
if data is None:
break
# remove the header of the CSV
data = data.split('\n', 1)[1] if header_written else data
op_file.write(data.encode('utf-8'))
header_written = True
op_file.close()
except RedcapError:
msg = "Automatic REDCap API chunked export failed"
log.error('Chunked export failed for pid:%s, form:%s ',
pid, row['formname'])
raise ValueError(msg)
else:
log.info('Completed the export of data for pid:%s, form:%s ',
pid, row['formname'])
def chunks(l, n):
# From:http://pycap.readthedocs.org/en/latest/deep.html#working-with-files
for i in xrange(0, len(l), n):
yield l[i:i + n]
def mk_get_config(os_path, openf, argv, Project):
'''Attenuate file, network access.
get_config() provides only
- config files given as CLI arg 1
- pid from CLI arg 2
- access to REDCap projects specified by config and pid
- write access to `file_dest` option from this config and pid
'''
def get_config():
[config_fn, pid] = argv[1:3]
config = configparser.SafeConfigParser()
config_fp = openf(config_fn)
config.readfp(config_fp, filename=config_fn)
api_url = config.get('api', 'api_url')
verify_ssl = config.getboolean('api', 'verify_ssl')
log.debug('API URL: %s', api_url)
bs_token = config.get(pid, 'bootstrap_token')
log.debug('bootstrap token: %s...%s', bs_token[:4], bs_token[-4:])
bs_proj = Project(api_url, bs_token, verify_ssl=verify_ssl)
data_token = config.get(pid, 'data_token')
data_proj = Project(api_url, data_token, verify_ssl=verify_ssl)
def open_dest(file_name, file_format):
file_dest = config.get(pid, 'file_dest')
return openf(os_path.join(file_dest,
file_name + '.' + file_format), 'wb')
return pid, bs_proj, data_proj, open_dest
return get_config
if __name__ == '__main__':
def _set_logging(logfile='redcap_api_export.log'):
from sys import argv
FORMAT = '%(asctime)-15s - %(message)s'
if '--debug' in argv:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(filename=logfile, format=FORMAT,
filemode='a', level=logging.INFO)
def _trusted_main():
from sys import argv
from os import path as os_path
from __builtin__ import open as openf
from redcap import Project
get_config = mk_get_config(os_path, openf, argv, Project)
main(get_config)
_set_logging()
_trusted_main()