-
Notifications
You must be signed in to change notification settings - Fork 0
/
Goodness of Fit(1).py
95 lines (76 loc) · 2.61 KB
/
Goodness of Fit(1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 25 11:20:37 2024
Implements various goodness of fit tests for linear regression
@author: SROTOSHI GHOSH
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def r(x,y,xy,x2,y2,N):
term= (N*np.sum(xy)- np.sum(x)*np.sum(y))
term2= np.abs((N*np.sum(x2)-np.sum(x)**2)*(N*np.sum(y2)-np.sum(y)**2))
return term/np.sqrt(term2)
def R2(y,ei2):
ydiff=[]
m=np.sum(y)/len(y)
for i in range (0,len(y)):
ydiff.append((y[i]-m)**2)
term=np.sum(ei2)/np.sum(ydiff)
return 1-term
filepath1=r'C:\Users\DELL\Desktop\PYTHON PROGS\Internship\Lin Reg Data2.csv'
filepath2=r'C:\Users\DELL\Desktop\PYTHON PROGS\Internship\Lin Reg Data4.csv'
df1=pd.read_csv(filepath1)
ei1=df1['residue ei']
xi1=df1['xi']
df2=pd.read_csv(filepath2)
ei2=df2['residue ei']
xi2=df2['xi']
#plotting residues and looking for any conclusive pattern
#if residues are randomly distributed, then they are said to be of adequate fit
plt.plot(xi1,ei1,'.',label="y=mx+c model")
#plt.plot(xi2,ei2,'*',label="y=mx model")
plt.legend()
plt.grid()
#both residue distributions have a random arrangement about the axes
#hence, we cannot decide which is a better fit through this method
#coefficient of correlation(r)
N1=len(xi1)
yi1=df1['yi']
xiyi1=df1['xi*yi']
xi21=df1['xi^2']
yi21=[]
for i in range (0,N1):
yi21.append(yi1[i]**2)
r1=r(xi1,yi1,xiyi1,xi21,yi21,N1)
N2=len(xi2)
yi2=df2['yi']
xiyi2=df2['xi*yi']
xi22=df2['xi^2']
yi22=[]
for i in range (0,N2):
yi22.append(yi2[i]**2)
r2=r(xi2,yi2,xiyi2,xi22,yi22,N2)
print ("The coefficient of correlation for y=mx+c model : ",r1)
print ("The coefficient of correlation for y=mx model : ",r2)
#coefficient of determination(R^2)
ei21=df1['ei^2']
ei22=df2['ei^2']
det1=R2(yi1,ei21)
print ("The coefficient of determination of y=mx+c model : ",det1)
det2=R2(yi2,ei22)
print ("The coefficient of determination for the y=mx model : ",det2)
#evaluating the sum of squares of residues and sum of residues
s_ei1=np.sum(ei1)
s_ei2=np.sum(ei2)
s_ei21=np.sum(ei21)
s_ei22=np.sum(ei22)
print (" The sums of residues for y=mx+c and y=mx model are respectively : ",s_ei1,"and", s_ei2)
print (" The sum of squares of residues of y=mx+c and y=mx models respectively are : ",s_ei21, "and", s_ei22)
'''
the sum is less for y=mx+c in both cases, so that is chosen
'''
rse1= np.sqrt(s_ei21/(N2-2))
print (" The value of residual standard error for y=mx+c model is : ",rse1)
rse2= np.sqrt(s_ei22/(N2-1))
print (" The value of residual standard error for y=mx model is : ",rse2)