-
Notifications
You must be signed in to change notification settings - Fork 4
/
utility.py
56 lines (40 loc) · 1.57 KB
/
utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
import pandas as pd #not of your use
import logging
import json
FILE_NAME_TRAIN = 'cleaned_train.csv' #replace this file name with the train file
FILE_NAME_TEST = 'cleaned_test.csv' #replace
ALPHA = 1e-3
EPOCHS = 100000
MODEL_FILE = 'models/model2'
train_flag = True
logging.basicConfig(filename='output.log',level=logging.DEBUG)
#utility functions
def loadData(file_name):
df = pd.read_csv(file_name)
logging.info("Number of data points in the data set "+str(len(df)))
y_df = df['resale_value']
keys = ['overall_rating', 'bought_at', 'months_used', 'issues_rating']
X_df = df.get(keys)
return X_df, y_df
def normalizeData(X_df, y_df, model):
#save the scaling factors so that after prediction the value can be again rescaled
model['input_scaling_factors'] = [list(X_df.mean()),list(X_df.std())]
model['output_scaling_factors'] = [y_df.mean(), y_df.std()]
X = np.array((X_df-X_df.mean())/X_df.std())
y = np.array((y_df - y_df.mean())/y_df.std())
return X, y, model
def normalizeTestData(X_df, y_df, model):
meanX = model['input_scaling_factors'][0]
stdX = model['input_scaling_factors'][1]
meany = model['output_scaling_factors'][0]
stdy = model['output_scaling_factors'][1]
X = 1.0*(X_df - meanX)/stdX
y = 1.0*(y_df - meany)/stdy
return X, y
def accuracy(X, y, model):
y_predicted = predict(X,np.array(model['theta']))
acc = np.sqrt(1.0*(np.sum(np.square(y_predicted - y)))/len(X))
print "error associated with thi model is "+str(acc)
def predict(X,theta):
return np.dot(X,theta)