-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_dataset.py
77 lines (59 loc) · 2.33 KB
/
check_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json
import sys
import numpy as np
import os
import pandas as pd
with open('your_config.json', 'r') as archivo_json:
config_datos = json.load(archivo_json)
dataset_train_positive = pd.read_csv('datasets/train/positive/dataset.csv')
dataset_train_negative = pd.read_csv('datasets/train/negative/dataset.csv')
dataset_test_positive = pd.read_csv('datasets/test/positive/dataset.csv')
dataset_test_negative = pd.read_csv('datasets/test/negative/dataset.csv')
# Train dataset
df_train_positive = dataset_train_positive[['path']]
df_train_positive['label'] = 1
df_train_positive = df_train_positive.rename(columns={'path': 'file_path'})
df_train_negative = dataset_train_negative[['path']]
df_train_negative['label'] = 0
df_train_negative = df_train_negative.rename(columns={'path': 'file_path'})
df = pd.concat([df_train_positive, df_train_negative], ignore_index=True)
files = df['file_path'].tolist()
labels = df['label'].tolist()
# Test dataset
df_test_positive = dataset_test_positive[['path']]
df_test_positive['label'] = 1
df_test_positive = df_test_positive.rename(columns={'path': 'file_path'})
df_test_negative = dataset_test_negative[['path']]
df_test_negative['label'] = 0
df_test_negative = df_test_negative.rename(columns={'path': 'file_path'})
df_test = pd.concat([df_test_positive, df_test_negative], ignore_index=True)
files_test = df_test['file_path'].tolist()
labels_test = df_test['label'].tolist()
print('----------------------')
print('Positive train dataset')
print('Total clips:', df_train_positive.shape[0])
print(df_train_positive.head())
print('----------------------')
print('Positive test dataset')
print('Total clips:', df_test_positive.shape[0])
print(df_test_positive.head())
print('----------------------')
print('Negative train dataset')
print('Total clips:', df_train_negative.shape[0])
print(df_train_negative.head())
print('----------------------')
print('Negative test dataset')
print('Total clips:', df_test_negative.shape[0])
print(df_test_negative.head())
print('----------------------')
print('Final train dataset')
print('Total clips:', len(files))
print('Example file:', files[0])
print('----------------------')
print('Final test dataset')
print('Total clips:', len(files_test))
print('Example file:', files_test[0])
print('----------------------')
print('Your config')
print(config_datos)
print('----------------------')