-
Notifications
You must be signed in to change notification settings - Fork 0
/
Tool_Extractfeature.py
184 lines (163 loc) · 6.74 KB
/
Tool_Extractfeature.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import numpy as np
import pandas as pd
import math
from scipy import stats
from scipy.fftpack import fft
def number_alternate_pn(array):
"""
The number of alternating occurrences of positive or negative values.
:param array: A column of data (array)
:return: int
"""
n = array.shape[0]
temp = 0
for i in range(n-1):
if array[i] * array[i + 1] < 0:
temp += 1
return temp
def slope_sign_change (array):
"""
The number of alternating occurrences of slope signs.
:param array: A column of data (array)
:return: int
"""
n = array.shape[0]
temp = 0
for i in range(1,n-1):
if (array[i]-array[i-1])*(array[i+1]-array[i]) < 0:
temp += 1
return temp
def waveform_length (array):
"""
calculate the waveform_length of the signal
:param array: A column of data (array)
:return: int
"""
n = array.shape[0]
temp = 0
for i in range(1,n):
temp = temp + abs(array[i] - array[i-1])
return temp
# def statistical(data, sta_char):
# """
# Extract statistical features from the data in each window.
# :param data: A column of data (array)
# :param sta_char: String flag.
# Mean: 'MEAN'.
# Unbiased standard deviation: 'USD'.
# Skewness: 'SK'.
# Kurtosis: 'KU'.
# Root mean square: 'RMS'.
# Mean absolute deviation: 'MAD'.
# Interquartile range: 'IR'.
# Rectified mean: 'RM'.
# Waveform factor: 'WF'.
# Spectral peak: 'SP'.
# Spectrum peak frequency: 'SPF'.
#
# Trend (specially for air pressure data): 'TREND'.
#
# If you don't want any features, use [] or ''.
# If you want one feature, use ['USD'].
# If you want several features, use ['USD', 'WF'].
# (The order of the features has been determined by the program and has nothing to do with the order of the string flags entered by the user.)
# If you want all the features (not including 'TREND'), we recommend using 'ALL'.
# :return: sta_data (list, statistical features extracted)
# """
#
# if sta_char == [] or sta_char == '': # If you don't want any features
# return []
#
# elif sta_char == 'TD': # 只使用时域特征
# mean_absolute_value = np.mean(abs(data))
# standard_deviation = np.std(data, ddof=1) # Unbiased standard deviation
# SSC = slope_sign_change(data)
# WL = waveform_length(data)
# root_mean_square = math.sqrt(sum([x ** 2 for x in data]) / len(data)) # Root mean square
# data_series = pd.Series(data)
# mean_absolute_deviation = data_series.mad() # Mean absolute deviation
#
# sta_data = [mean_absolute_value, standard_deviation, SSC, WL, root_mean_square, mean_absolute_deviation,
# ]
# #
# return sta_data
#
# else:
# mean_absolute_value = np.mean(abs(data))
# WL = waveform_length(data)
# sta_data = [mean_absolute_value, WL]
# return sta_data
#通用的提取特征模板
def statistical(data, sta_char):
"""
Extract statistical features from the data in each window.
:param data: A column of data (array)
:param sta_char: String flag.
Mean: 'MEAN'.
Unbiased standard deviation: 'USD'.
Skewness: 'SK'.
Kurtosis: 'KU'.
Root mean square: 'RMS'.
Mean absolute deviation: 'MAD'.
Interquartile range: 'IR'.
Rectified mean: 'RM'.
Waveform factor: 'WF'.
Spectral peak: 'SP'.
Spectrum peak frequency: 'SPF'.
Trend (specially for air pressure data): 'TREND'.
If you don't want any features, use [] or ''.
If you want one feature, use ['USD'].
If you want several features, use ['USD', 'WF'].
(The order of the features has been determined by the program and has nothing to do with the order of the string flags entered by the user.)
If you want all the features (not including 'TREND'), we recommend using 'ALL'.
:return: sta_data (list, statistical features extracted)
"""
if sta_char == [] or sta_char == '': # If you don't want any features
return []
else:
mean = data.mean() # Mean, np.mean(data)
mean_absolute_value = np.mean(abs(data))
standard_deviation = np.std(data, ddof=1) # Unbiased standard deviation
data_series = pd.Series(data)
skewness = data_series.skew() # Skewness
kurtosis = data_series.kurt() # Kurtosis
root_mean_square = math.sqrt(sum([x ** 2 for x in data]) / len(data)) # Root mean square
mean_absolute_deviation = data_series.mad() # Mean absolute deviation
interquartile_range = stats.scoreatpercentile(data_series, 75) - stats.scoreatpercentile(data_series, 25) # interquartile range
rectified_mean = np.mean(abs(data)) # Rectified mean
waveform_factor = root_mean_square / rectified_mean # Waveform factor
alternate_pn = number_alternate_pn(data)
SSC = slope_sign_change(data)
WL = waveform_length(data)
# Spectral peak
N = data.shape[0]
data = data - mean
spectral = fft(data)
abs_spectral = np.abs(spectral)/N
abs_spectral_half = abs_spectral[range(int(N / 2))]
spectral_peak = abs_spectral_half.max()
spectrum_peak_freq = int(np.argwhere(abs_spectral_half == spectral_peak))/3 # frequency conversion: freq=n*fs/N
sta_char_dict = {'MEAN': mean, 'MAV':mean_absolute_value,'USD': standard_deviation, 'SK': skewness, 'KU': kurtosis,
'RMS': root_mean_square, 'MAD': mean_absolute_deviation,
'IR': interquartile_range, 'RM': rectified_mean, 'WF': waveform_factor,
'APN': alternate_pn,'SSC': SSC, 'WL': WL,
'SP': spectral_peak, 'SPF': spectrum_peak_freq,
}
if sta_char == 'ALL': # If you want all the features
sta_data = [mean, mean_absolute_value,standard_deviation, skewness, kurtosis, root_mean_square,
mean_absolute_deviation, interquartile_range, rectified_mean, waveform_factor,
spectral_peak, spectrum_peak_freq]
return sta_data
elif sta_char == 'TD': #只使用时域特征
sta_data = [mean_absolute_value,standard_deviation, WL, root_mean_square, mean_absolute_deviation
]
#
return sta_data
elif sta_char == 'FD': #只使用频域特征
sta_data = [spectral_peak, spectrum_peak_freq]
return sta_data
else:
sta_data = []
for string_flag in sta_char: # If you want several features or all the features
sta_data.append(sta_char_dict[string_flag])
return sta_data