This repository has been archived by the owner on Aug 11, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Session9.py
92 lines (70 loc) · 2.59 KB
/
Session9.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
#######Feature selection#########
#Removing features with low variance
from sklearn.feature_selection import VarianceThreshold
X = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]]
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
newX= sel.fit_transform(X)
#Univariate feature selection
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest,SelectPercentile
from sklearn.feature_selection import chi2
iris = load_iris()
X, y = iris.data, iris.target
print(X.shape)
X_new = SelectKBest(chi2, k=2).fit_transform(X, y)
print(X_new.shape)
X_newpercent = SelectPercentile(chi2, percentile=70).fit_transform(X, y)
print(X_newpercent.shape)
## Feature Extraction with RFE(recursive feature elimination)
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
# feature extraction
model = LogisticRegression()
rfe = RFE(model, 2)
fit = rfe.fit(X, y)
print("Num Features: ", int(fit.n_features_))
print("Selected Features: ",fit.support_)
print("Feature Ranking: ", fit.ranking_)
# Feature Importance with Extra Trees Classifier
from sklearn.ensemble import ExtraTreesClassifier
# feature extraction
model = ExtraTreesClassifier()
model.fit(X, y)
print(model.feature_importances_)
# Tree-based feature selection
from sklearn.feature_selection import SelectFromModel
print(X.shape)
clf = ExtraTreesClassifier(n_estimators=50)
clf = clf.fit(X, y)
print(clf.feature_importances_)
model = SelectFromModel(clf, prefit=True)
X_new = model.transform(X)
print(X_new.shape)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca_fit=pca.fit(X)
print(pca_fit.components_)
print("The amount of variance explained by each of the selected components : \n", pca.explained_variance_ratio_)
print("*" * 50)
print("The summation amount of variance explained by each of the selected components : \n", np.sum(pca.explained_variance_ratio_))
pca_x = pca.transform(X)
from sklearn import linear_model
lm = linear_model.LinearRegression(normalize=True)
lm.fit(pca_x, y)
predicted = lm.predict(pca_x)
from sklearn.metrics import r2_score, mean_squared_error
print("MSE: ", mean_squared_error(y, predicted))
print("R2 score: ", r2_score(y, predicted))
B = lm.coef_
B0 = lm.intercept_
model = lambda X: B0 + np.dot(X, B)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
if len(pca_x[0]) == 2 :
fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(111, projection="3d")
ax.scatter(pca_x[:, 0], pca_x[:, 1], y, c='green')
ax.plot3D(pca_x[:, 0], pca_x[:, 1], model(pca_x), 'r')
plt.show()
1+1