-
Notifications
You must be signed in to change notification settings - Fork 0
/
output-analyses.py
46 lines (35 loc) · 1.49 KB
/
output-analyses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import sys
import glob
import re
import os
import pandas as pd
files = glob.glob('/mnt/g/docking-outputs/*.pdbqt')
main_df = pd.DataFrame()
print(files)
pdbqt_data_for_this_file = []
name = []
model = []
for file in files:
with open(file) as fp:
# read all lines in a list
lines = fp.readlines()
for line in lines:
if line.startswith("REMARK Name"):
name_splited = line.split()
name.append(name_splited)
elif line.startswith("REMARK VINA RESULT:"):
line_splited = line.split()
pdbqt_data_for_this_file.append(line_splited)
elif line.startswith("MODEL"):
model_splited = line.split()
model.append(model_splited)
pdbqt_df = pd.DataFrame(pdbqt_data_for_this_file, columns=['REMARK', 'VINA', 'RESULT', 'Affinity (kcal/mol)', 'Dist from RMSD l.b.', 'Best Mode RMSD u.b.'])
name_df = pd.DataFrame(name, columns=['REMARK', 'Name','= because im lazy','ZINC ID'])
model_df = pd.DataFrame(model, columns=['MODEL','Number'])
pdbqt_data = {'ZINC_ID': name_df['ZINC ID'], 'MODEL' : model_df['Number'], 'Affinity (kcal/mol)': pdbqt_df['Affinity (kcal/mol)'], 'Dist from RMSD l.b.': pdbqt_df['Dist from RMSD l.b.'], 'Best Mode RMSD u.b.': pdbqt_df['Best Mode RMSD u.b.']}
pdbqt_data_frame = pd.DataFrame(pdbqt_data)
print(pdbqt_data_frame)
main_df = main_df._append(pdbqt_data_frame)
print(main_df)
main_df.to_excel("output.xlsx", index=False)
main_df.to_csv("output.csv", index=False)