diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 344f35d..b4ceb23 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,10 @@ -repos: -- repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black - language_version: python3 +files: "spreg\/" +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.6.3" + hooks: + - id: ruff-format + +ci: + autofix_prs: false + autoupdate_schedule: quarterly diff --git a/pyproject.toml b/pyproject.toml index 3ef59dc..e9ade88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,13 +67,10 @@ tests = [ [tool.setuptools.packages.find] include = ["spreg", "spreg.*"] -[tool.black] -line-length = 88 - [tool.ruff] line-length = 88 -select = ["E", "F", "W", "I", "UP", "N", "B", "A", "C4", "SIM", "ARG"] -ignore = [ +lint.select = ["E", "F", "W", "I", "UP", "N", "B", "A", "C4", "SIM", "ARG"] +lint.ignore = [ "B006", "B008", "B009", diff --git a/spreg/__init__.py b/spreg/__init__.py index 2de967b..64afa0b 100755 --- a/spreg/__init__.py +++ b/spreg/__init__.py @@ -1,4 +1,3 @@ - import contextlib from importlib.metadata import PackageNotFoundError, version diff --git a/spreg/dgp.py b/spreg/dgp.py index 36576d7..712e4fc 100644 --- a/spreg/dgp.py +++ b/spreg/dgp.py @@ -29,14 +29,14 @@ "dgp_spdurbin", "dgp_lagerr", "dgp_gns", - "dgp_mess" + "dgp_mess", ] - - -def make_error(rng,n,mu=0,varu=1,method='normal'): + + +def make_error(rng, n, mu=0, varu=1, method="normal"): """ make_error: generate error term for a given distribution - + Arguments: ---------- rng: random number object @@ -45,7 +45,7 @@ def make_error(rng,n,mu=0,varu=1,method='normal'): varu: variance (when needed) method: type of distribution, one of normal, laplace, cauchy, lognormal - + Returns: -------- u: nx1 vector of random errors @@ -65,29 +65,30 @@ def make_error(rng,n,mu=0,varu=1,method='normal'): """ # normal - standard normal is default - if method == 'normal': + if method == "normal": sdu = math.sqrt(varu) - u = rng.normal(loc=mu,scale=sdu,size=n).reshape(n,1) + u = rng.normal(loc=mu, scale=sdu, size=n).reshape(n, 1) # laplace with thicker tails - elif method == 'laplace': - sdu = math.sqrt(varu/2.0) - u = rng.laplace(loc=mu,scale=sdu,size=n).reshape(n,1) + elif method == "laplace": + sdu = math.sqrt(varu / 2.0) + u = rng.laplace(loc=mu, scale=sdu, size=n).reshape(n, 1) # cauchy, ill-behaved, no mean or variance defined - elif method == 'cauchy': - u = rng.standard_cauchy(size=n).reshape(n,1) - elif method == 'lognormal': + elif method == "cauchy": + u = rng.standard_cauchy(size=n).reshape(n, 1) + elif method == "lognormal": sdu = math.sqrt(varu) - u = rng.lognormal(mean=mu,sigma=sdu,size=n).reshape(n,1) + u = rng.lognormal(mean=mu, sigma=sdu, size=n).reshape(n, 1) # all other yield warning else: - print('Warning: Unsupported distribution') + print("Warning: Unsupported distribution") u = None return u -def make_x(rng,n,mu=[0],varu=[1],cor=0,method='uniform'): + +def make_x(rng, n, mu=[0], varu=[1], cor=0, method="uniform"): """ - make_x: generate a matrix of k columns of x for a given distribution - + make_x: generate a matrix of k columns of x for a given distribution + Arguments: ---------- rng: random number object @@ -97,11 +98,11 @@ def make_x(rng,n,mu=[0],varu=[1],cor=0,method='uniform'): cor: correlation as a float (for bivariate normal only) method: type of distribution, one of uniform, normal, bivnormal (bivariate normal) - + Returns: -------- x: nxk matrix of x variables - + Note: ----- Uniform and normal generate separate draws, bivariate normal generates @@ -125,49 +126,50 @@ def make_x(rng,n,mu=[0],varu=[1],cor=0,method='uniform'): k = len(mu) if k == len(varu): # initialize - x = np.zeros((n,k)) + x = np.zeros((n, k)) for i in range(k): # uniform - range is derived from variance since var = (1/12)range^2 # range is found as square root of 12 times variance # for 0-1, varu should be 0.0833333 # low is always 0 - if method == 'uniform': - sdu = math.sqrt(12.0*varu[i]) - x[:,i] = rng.uniform(low=0,high=sdu,size=n) + if method == "uniform": + sdu = math.sqrt(12.0 * varu[i]) + x[:, i] = rng.uniform(low=0, high=sdu, size=n) # normal - independent normal draws - elif method == 'normal': + elif method == "normal": sdu = math.sqrt(varu[i]) - x[:,i] = rng.normal(loc=mu[i],scale=sdu,size=n) + x[:, i] = rng.normal(loc=mu[i], scale=sdu, size=n) # bivariate normal - only for k=2 - elif method == 'bivnormal': + elif method == "bivnormal": if k != 2: - print('Error: Wrong dimension for k') + print("Error: Wrong dimension for k") x = None return x else: - ucov = cor* math.sqrt(varu[0]*varu[1]) - mcov = [[varu[0],ucov],[ucov,varu[1]]] - x = rng.multivariate_normal(mean=mu,cov=mcov,size=n) + ucov = cor * math.sqrt(varu[0] * varu[1]) + mcov = [[varu[0], ucov], [ucov, varu[1]]] + x = rng.multivariate_normal(mean=mu, cov=mcov, size=n) return x else: - print('Warning: Unsupported distribution') + print("Warning: Unsupported distribution") x = None else: x = None return x -def make_wx(x,w,o=1): + +def make_wx(x, w, o=1): """ - make_wx: generate a matrix spatially lagged x given matrix x - + make_wx: generate a matrix spatially lagged x given matrix x + x must be previously generated using make_x, no constant included - + Arguments: ---------- x: x matrix - no constant w: row-standardized spatial weights in spreg format o: order of contiguity, default o=1 - + Returns: -------- wx: nx(kxo) matrix of spatially lagged x variables @@ -193,26 +195,26 @@ def make_wx(x,w,o=1): if w.n != x.shape[0]: print("Error: incompatible weights dimensions") return None - w1x = libpysal.weights.lag_spatial(w,x) + w1x = libpysal.weights.lag_spatial(w, x) wx = w1x if o > 1: - for i in range(1,o): - whx = libpysal.weights.lag_spatial(w,w1x) + for i in range(1, o): + whx = libpysal.weights.lag_spatial(w, w1x) w1x = whx - wx = np.hstack((wx,whx)) + wx = np.hstack((wx, whx)) return wx - -def make_xb(x,beta): + +def make_xb(x, beta): """ - make_xb: generate a column xb as matrix x (constant added) + make_xb: generate a column xb as matrix x (constant added) times list beta (includes coefficient for constant term) - + Arguments: ---------- x: n x (k-1) matrix for x variables beta: k length list of regression coefficients - + Returns: -------- xb: nx1 vector of x times beta @@ -233,25 +235,26 @@ def make_xb(x,beta): """ n = x.shape[0] k = x.shape[1] - if k+1 != len(beta): + if k + 1 != len(beta): print("Error: Incompatible dimensions") return None else: - b = np.array(beta)[:,np.newaxis] - x1=np.hstack((np.ones((n,1)),x)) # include constant - xb = np.dot(x1,b) + b = np.array(beta)[:, np.newaxis] + x1 = np.hstack((np.ones((n, 1)), x)) # include constant + xb = np.dot(x1, b) return xb - -def make_wxg(wx,gamma): + + +def make_wxg(wx, gamma): """ - make_wxg: generate a column wxg as matrix wx (no constant) + make_wxg: generate a column wxg as matrix wx (no constant) times list gamma (coefficient of spatially lagged x) - + Arguments: ---------- wx: n x ((k-1)xo) matrix for spatially lagged x variables of all orders gamma: (k-1)*o length list of regression coefficients for spatially lagged x - + Returns: -------- wxg: nx1 vector of wx times gamma @@ -278,18 +281,19 @@ def make_wxg(wx,gamma): """ k = wx.shape[1] - if (k > 1): + if k > 1: if k != len(gamma): print("Error: Incompatible dimensions") return None else: - g = np.array(gamma)[:,np.newaxis] - wxg = np.dot(wx,g) + g = np.array(gamma)[:, np.newaxis] + wxg = np.dot(wx, g) else: # gamma is a scalar wxg = wx * gamma return wxg -def dgp_errproc(u,w,lam=0.5,model='sar',imethod='power_exp'): + +def dgp_errproc(u, w, lam=0.5, model="sar", imethod="power_exp"): """ dgp_errproc: generates pure spatial error process @@ -327,25 +331,25 @@ def dgp_errproc(u,w,lam=0.5,model='sar',imethod='power_exp'): if w.n != n0: print("Error: incompatible weights dimensions") return None - if model == 'sar': - y = inverse_prod(w,u,lam,inv_method=imethod) - elif model == 'ma': - y = u + lam * libpysal.weights.lag_spatial(w,u) + if model == "sar": + y = inverse_prod(w, u, lam, inv_method=imethod) + elif model == "ma": + y = u + lam * libpysal.weights.lag_spatial(w, u) else: print("Error: unsupported model type") return None return y - -def dgp_ols(u,xb): + +def dgp_ols(u, xb): """ dgp_ols: generates y for non-spatial process with given xb and error term u - + Arguments: ---------- u: random error vector xb: vector of xb - + Returns: ---------- y: vector of observations on dependent variable @@ -369,23 +373,24 @@ def dgp_ols(u,xb): """ n1 = u.shape[0] - n2 = xb.shape[0] + n2 = xb.shape[0] if n1 != n2: print("Error: dimension mismatch") return None y = xb + u return y -def dgp_slx(u,xb,wxg): + +def dgp_slx(u, xb, wxg): """ dgp_slx: generates y for SLX with given xb, wxg, and error term u - + Arguments: ---------- u: random error vector xb: vector of xb wxg: vector of wxg - + Returns: ---------- y: vector of observations on dependent variable @@ -412,7 +417,7 @@ def dgp_slx(u,xb,wxg): [8.37023076]]) """ n0 = u.shape[0] - n1 = xb.shape[0] + n1 = xb.shape[0] n2 = wxg.shape[0] if n0 != n1: print("Error: dimension mismatch") @@ -421,13 +426,14 @@ def dgp_slx(u,xb,wxg): print("Error: dimension mismatch") return None y = xb + wxg + u - return y - -def dgp_sperror(u,xb,w,lam=0.5,model='sar',imethod='power_exp'): + return y + + +def dgp_sperror(u, xb, w, lam=0.5, model="sar", imethod="power_exp"): """ dgp_sperror: generates y for spatial error model with given xb, weights, spatial parameter lam, error term, method for inverse transform - + Arguments: ---------- u: random error @@ -436,7 +442,7 @@ def dgp_sperror(u,xb,w,lam=0.5,model='sar',imethod='power_exp'): lam: spatial coefficient model: type of process ('sar' or 'ma') imethod: method for inverse transformation, default = 'power_exp' - + Returns: ---------- y: vector of observations on dependent variable @@ -468,22 +474,23 @@ def dgp_sperror(u,xb,w,lam=0.5,model='sar',imethod='power_exp'): elif w.n != n1: print("Error: incompatible weights dimensions") return None - if model == 'sar': - u1 = inverse_prod(w,u,lam,inv_method=imethod) - elif model == 'ma': - u1 = u + lam * libpysal.weights.lag_spatial(w,u) + if model == "sar": + u1 = inverse_prod(w, u, lam, inv_method=imethod) + elif model == "ma": + u1 = u + lam * libpysal.weights.lag_spatial(w, u) else: print("Error: unsupported model type") return None y = xb + u1 return y -def dgp_slxerror(u,xb,wxg,w,lam=0.5,model='sar',imethod='power_exp'): + +def dgp_slxerror(u, xb, wxg, w, lam=0.5, model="sar", imethod="power_exp"): """ dgp_sperror: generates y for SLX spatial error model with xb, wxg, weights, spatial parameter lam, model type (sar or ma), error term, method for inverse transform - + Arguments: ---------- u: random error @@ -493,7 +500,7 @@ def dgp_slxerror(u,xb,wxg,w,lam=0.5,model='sar',imethod='power_exp'): lam: spatial coefficient model: type of process ('sar' or 'ma') imethod: method for inverse transformation, default = 'power_exp' - + Returns: ---------- y: vector of observations on dependent variable @@ -518,10 +525,10 @@ def dgp_slxerror(u,xb,wxg,w,lam=0.5,model='sar',imethod='power_exp'): [3.93234334], [4.51270801], [7.7217999 ]]) - """ - + """ + n0 = u.shape[0] - n1 = xb.shape[0] + n1 = xb.shape[0] n2 = wxg.shape[0] if n0 != n1: print("Error: dimension mismatch") @@ -532,22 +539,23 @@ def dgp_slxerror(u,xb,wxg,w,lam=0.5,model='sar',imethod='power_exp'): if w.n != n1: print("Error: incompatible weights dimensions") return None - if model == 'sar': - u1 = inverse_prod(w,u,lam,inv_method=imethod) - elif model == 'ma': - u1 = u + lam * libpysal.weights.lag_spatial(w,u) + if model == "sar": + u1 = inverse_prod(w, u, lam, inv_method=imethod) + elif model == "ma": + u1 = u + lam * libpysal.weights.lag_spatial(w, u) else: print("Error: unsupported model type") - return None + return None y = xb + wxg + u1 return y - -def dgp_lag(u,xb,w,rho=0.5,imethod='power_exp'): + + +def dgp_lag(u, xb, w, rho=0.5, imethod="power_exp"): """ dgp_lag: generates y for spatial lag model with xb, weights, spatial parameter rho, error term, method for inverse transform - + Arguments: ---------- u: random error @@ -555,7 +563,7 @@ def dgp_lag(u,xb,w,rho=0.5,imethod='power_exp'): w: spatial weights rho: spatial coefficient imethod: method for inverse transformation, default = 'power_exp' - + Returns: ---------- y: vector of observations on dependent variable @@ -578,7 +586,7 @@ def dgp_lag(u,xb,w,rho=0.5,imethod='power_exp'): [ 5.39821733], [ 5.62244672], [ 8.868168 ]]) - """ + """ n0 = u.shape[0] n1 = xb.shape[0] if n0 != n1: @@ -588,15 +596,16 @@ def dgp_lag(u,xb,w,rho=0.5,imethod='power_exp'): print("Error: incompatible weights dimensions") return None y1 = xb + u - y = inverse_prod(w,y1,rho,inv_method=imethod) + y = inverse_prod(w, y1, rho, inv_method=imethod) return y - -def dgp_spdurbin(u,xb,wxg,w,rho=0.5,imethod='power_exp'): + + +def dgp_spdurbin(u, xb, wxg, w, rho=0.5, imethod="power_exp"): """ dgp_spdurbin: generates y for spatial Durbin model with xb, wxg, weights, spatial parameter rho, error term, method for inverse transform - + Arguments: ---------- u: random error @@ -605,7 +614,7 @@ def dgp_spdurbin(u,xb,wxg,w,rho=0.5,imethod='power_exp'): w: spatial weights rho: spatial coefficient imethod: method for inverse transformation, default = 'power_exp' - + Returns: ---------- y: vector of observations on dependent variable @@ -630,9 +639,9 @@ def dgp_spdurbin(u,xb,wxg,w,rho=0.5,imethod='power_exp'): [11.95080505], [12.55220513], [15.75805066]]) - """ + """ n0 = u.shape[0] - n1 = xb.shape[0] + n1 = xb.shape[0] n2 = wxg.shape[0] if n0 != n1: print("Error: dimension mismatch") @@ -643,17 +652,18 @@ def dgp_spdurbin(u,xb,wxg,w,rho=0.5,imethod='power_exp'): if w.n != n1: print("Error: incompatible weights dimensions") y1 = xb + wxg + u - y = inverse_prod(w,y1,rho,inv_method=imethod) + y = inverse_prod(w, y1, rho, inv_method=imethod) return y - -def dgp_lagerr(u,xb,w,rho=0.5,lam=0.2,model='sar',imethod='power_exp'): + + +def dgp_lagerr(u, xb, w, rho=0.5, lam=0.2, model="sar", imethod="power_exp"): """ dgp_lagerr: generates y for spatial lag model with sar or ma errors with xb, weights, spatial parameter rho, spatial parameter lambda, model for spatial process, error term, method for inverse transform - + Arguments: ---------- u: random error @@ -663,7 +673,7 @@ def dgp_lagerr(u,xb,w,rho=0.5,lam=0.2,model='sar',imethod='power_exp'): lam: spatial coefficient for error model: spatial process for error imethod: method for inverse transformation, default = 'power_exp' - + Returns: ---------- y: vector of observations on dependent variable @@ -686,7 +696,7 @@ def dgp_lagerr(u,xb,w,rho=0.5,lam=0.2,model='sar',imethod='power_exp'): [ 5.40644034], [ 5.51132886], [ 8.58872366]]) - """ + """ n0 = u.shape[0] n1 = xb.shape[0] if n0 != n1: @@ -695,25 +705,26 @@ def dgp_lagerr(u,xb,w,rho=0.5,lam=0.2,model='sar',imethod='power_exp'): if w.n != n1: print("Error: incompatible weights dimensions") return None - if model == 'sar': - u1 = inverse_prod(w,u,lam,inv_method=imethod) - elif model == 'ma': - u1 = u + lam * libpysal.weights.lag_spatial(w,u) + if model == "sar": + u1 = inverse_prod(w, u, lam, inv_method=imethod) + elif model == "ma": + u1 = u + lam * libpysal.weights.lag_spatial(w, u) else: print("Error: unsupported model type") return None y1 = xb + u1 - y = inverse_prod(w,y1,rho,inv_method=imethod) + y = inverse_prod(w, y1, rho, inv_method=imethod) return y - -def dgp_gns(u,xb,wxg,w,rho=0.5,lam=0.2,model='sar',imethod='power_exp'): + + +def dgp_gns(u, xb, wxg, w, rho=0.5, lam=0.2, model="sar", imethod="power_exp"): """ dgp_gns: generates y for general nested model with sar or ma errors with xb, wxg, weights, spatial parameter rho, spatial parameter lambda, model for spatial process, error term, method for inverse transform - + Arguments: ---------- u: random error @@ -724,7 +735,7 @@ def dgp_gns(u,xb,wxg,w,rho=0.5,lam=0.2,model='sar',imethod='power_exp'): lam: spatial coefficient for error model: spatial process for error imethod: method for inverse transformation, default = 'power_exp' - + Returns: ---------- y: vector of observations on dependent variable @@ -749,42 +760,43 @@ def dgp_gns(u,xb,wxg,w,rho=0.5,lam=0.2,model='sar',imethod='power_exp'): [11.95902806], [12.44108728], [15.47860632]]) - """ + """ n0 = u.shape[0] - n1 = xb.shape[0] + n1 = xb.shape[0] n2 = wxg.shape[0] if n0 != n1: print("Error: dimension mismatch") - return None + return None elif n1 != n2: print("Error: dimension mismatch") return None if w.n != n1: print("Error: incompatible weights dimensions") - if model == 'sar': - u1 = inverse_prod(w,u,lam,inv_method=imethod) - elif model == 'ma': - u1 = u + lam * libpysal.weights.lag_spatial(w,u) + if model == "sar": + u1 = inverse_prod(w, u, lam, inv_method=imethod) + elif model == "ma": + u1 = u + lam * libpysal.weights.lag_spatial(w, u) else: print("Error: unsupported model type") return None y1 = xb + wxg + u1 - y = inverse_prod(w,y1,rho,inv_method=imethod) + y = inverse_prod(w, y1, rho, inv_method=imethod) return y - -def dgp_mess(u,xb,w,rho=0.5): + + +def dgp_mess(u, xb, w, rho=0.5): """ dgp_mess: generates y for MESS spatial lag model with xb, weights, spatial parameter rho (gets converted into alpha), sigma/method for the error term - + Arguments: ---------- u: random error xb: vector of xb w: spatial weights rho: spatial coefficient (converted into alpha) - + Returns: ---------- y: vector of observations on dependent variable @@ -807,22 +819,23 @@ def dgp_mess(u,xb,w,rho=0.5): [ 5.32807674], [ 5.55549492], [ 8.62685145]]) - """ + """ n0 = u.shape[0] n1 = xb.shape[0] if n0 != n1: print("Error: dimension mismatch") - return None + return None if w.n != n1: print("Error: incompatible weights dimensions") return None bigw = libpysal.weights.full(w)[0] - alpha=np.log(1-rho) #convert between rho and alpha - aw=-alpha*bigw # inverse exponential is -alpha + alpha = np.log(1 - rho) # convert between rho and alpha + aw = -alpha * bigw # inverse exponential is -alpha xbu = xb + u - y = np.dot(expm(aw),xbu) + y = np.dot(expm(aw), xbu) return y + def _test(): import doctest @@ -831,6 +844,6 @@ def _test(): doctest.testmod() np.set_printoptions(suppress=start_suppress) + if __name__ == "__main__": _test() - \ No newline at end of file diff --git a/spreg/diagnostics.py b/spreg/diagnostics.py index 817fc56..442d3a2 100755 --- a/spreg/diagnostics.py +++ b/spreg/diagnostics.py @@ -1,7 +1,8 @@ """ -Diagnostics for regression estimations. - +Diagnostics for regression estimations. + """ + __author__ = ( "Luc Anselin luc.anselin@asu.edu, Nicholas Malizia nicholas.malizia@asu.edu " ) @@ -38,7 +39,7 @@ ] -def f_stat(reg,df=0): +def f_stat(reg, df=0): """ Calculates the f-statistic and associated p-value for multiple coefficient constraints :cite:`Greene2003`. @@ -49,7 +50,7 @@ def f_stat(reg,df=0): ---------- reg : regression object output instance from a regression model - df : number of coefficient constraints + df : number of coefficient constraints (zero constraint for last df coefficients in betas) Returns @@ -100,15 +101,15 @@ def f_stat(reg,df=0): utu = reg.utu # (scalar) residual sum of squares # default case, all coefficients if df == 0: - r = k-1 + r = k - 1 predy = reg.predy # (array) vector of predicted values (n x 1) mean_y = reg.mean_y # (scalar) mean of dependent observations U = np.sum((predy - mean_y) ** 2) - else: # F test on last df coefficients + else: # F test on last df coefficients y = reg.y r = df - x0 = reg.x[:,:-r] - olsr = BaseOLS(y,x0) # constrained regression + x0 = reg.x[:, :-r] + olsr = BaseOLS(y, x0) # constrained regression rtr = olsr.utu U = rtr - utu fStat = (U / r) / (utu / (n - k)) @@ -1389,6 +1390,7 @@ def likratiotest(reg0, reg1): likratio = {"likr": likr, "df": 1, "p-value": pvalue} return likratio + def dwh(reg): """ Durbin-Wu-Hausman test on endogeneity of variables @@ -1406,23 +1408,23 @@ def dwh(reg): and associated p-value """ - n = reg.n - ny = reg.yend.shape[1] # number of endogenous variables + n = reg.n + ny = reg.yend.shape[1] # number of endogenous variables qq = reg.h # all exogenous and instruments xx = reg.z # all exogenous and endogenous # get predicted values for endogenous variables on all instruments - py = np.zeros((n,ny)) + py = np.zeros((n, ny)) for i in range(ny): - yy = reg.yend[:, i].reshape(n,1) - ols1 = BaseOLS(y=yy,x=qq) + yy = reg.yend[:, i].reshape(n, 1) + ols1 = BaseOLS(y=yy, x=qq) yp = ols1.predy - py[0:n,i] = yp.flatten() + py[0:n, i] = yp.flatten() nxq = sphstack(xx, py) # F-test in augmented regression ols2 = BaseOLS(y=reg.y, x=nxq) dwh = f_stat(ols2, df=ny) - return dwh - + return dwh + def _test(): import doctest diff --git a/spreg/diagnostics_panel.py b/spreg/diagnostics_panel.py index 5b299d3..c991fbe 100644 --- a/spreg/diagnostics_panel.py +++ b/spreg/diagnostics_panel.py @@ -62,7 +62,7 @@ def panel_LMlag(y, x, w): num = num2 + (trw * trw * ols.sig2) J = num / ols.sig2 utwy = spdot(ols.u.T, spdot(Wsp_nt, y)) - lm = utwy ** 2 / (ols.sig2 ** 2 * J) + lm = utwy**2 / (ols.sig2**2 * J) pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0]) @@ -98,7 +98,7 @@ def panel_LMerror(y, x, w): wTw = spdot(W.T, W) trw = ww.diagonal().sum() + wTw.diagonal().sum() utwu = spdot(ols.u.T, spdot(Wsp_nt, ols.u)) - lm = utwu ** 2 / (ols.sig2 ** 2 * t * trw) + lm = utwu**2 / (ols.sig2**2 * t * trw) pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0]) diff --git a/spreg/diagnostics_sp.py b/spreg/diagnostics_sp.py index 42dadc0..2a5df83 100644 --- a/spreg/diagnostics_sp.py +++ b/spreg/diagnostics_sp.py @@ -1,6 +1,7 @@ """ Spatial diagnostics module """ + __author__ = "Luc Anselin lanselin@gmail.com, Daniel Arribas-Bel darribas@asu.edu, Pedro Amaral pedrovma@gmail.com" from .utils import spdot @@ -162,10 +163,20 @@ class LMtests: def __init__(self, ols, w, tests=["all"]): cache = spDcache(ols, w) if tests == ["all"]: - tests = ["lme", "lml", "rlme", "rlml", "sarma", "lmwx", "lmspdurbin", "rlmwx", - "rlmdurlag", "lmslxerr"] # added back in for access + tests = [ + "lme", + "lml", + "rlme", + "rlml", + "sarma", + "lmwx", + "lmspdurbin", + "rlmwx", + "rlmdurlag", + "lmslxerr", + ] # added back in for access if any(test in ["lme", "lmslxerr"] for test in tests): - #if "lme" in tests: + # if "lme" in tests: self.lme = lmErr(ols, w, cache) if any(test in ["lml", "rlmwx"] for test in tests): self.lml = lmLag(ols, w, cache) @@ -175,8 +186,8 @@ def __init__(self, ols, w, tests=["all"]): self.rlml = rlmLag(ols, w, cache) if "sarma" in tests: self.sarma = lmSarma(ols, w, cache) - #if any(test in ["lmwx", "rlmdurlag", "lmslxerr"] for test in tests): - if any(test in ["lmwx", "rlmdurlag","lmslxerr"] for test in tests): + # if any(test in ["lmwx", "rlmdurlag", "lmslxerr"] for test in tests): + if any(test in ["lmwx", "rlmdurlag", "lmslxerr"] for test in tests): self.lmwx = lm_wx(ols, w) if any(test in ["lmspdurbin", "rlmdurlag", "rlmwx"] for test in tests): self.lmspdurbin = lm_spdurbin(ols, w) @@ -184,9 +195,10 @@ def __init__(self, ols, w, tests=["all"]): self.rlmwx = rlm_wx(ols, self.lmspdurbin, self.lml) if "rlmdurlag" in tests: self.rlmdurlag = rlm_durlag(self.lmspdurbin, self.lmwx) - if "lmslxerr" in tests: #currently removed - LA added back in for access + if "lmslxerr" in tests: # currently removed - LA added back in for access self.lmslxerr = lm_slxerr(ols, self.lme, self.lmwx) + class MoranRes: """ Moran's I for spatial autocorrelation in residuals from OLS regression @@ -564,7 +576,7 @@ def lmErr(reg, w, spDcache): Pair of statistic and p-value for the LM error test. """ - lm = spDcache.utwuDs ** 2 / spDcache.t + lm = spDcache.utwuDs**2 / spDcache.t pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0]) @@ -589,7 +601,7 @@ def lmLag(ols, w, spDcache): Pair of statistic and p-value for the LM lag test. """ - lm = spDcache.utwyDs ** 2 / (ols.n * spDcache.j) + lm = spDcache.utwyDs**2 / (ols.n * spDcache.j) pval = chisqprob(lm, 1) return (lm[0][0], pval[0][0]) @@ -672,11 +684,12 @@ def lmSarma(ols, w, spDcache): """ first = (spDcache.utwyDs - spDcache.utwuDs) ** 2 / (w.n * spDcache.j - spDcache.t) - secnd = spDcache.utwuDs ** 2 / spDcache.t + secnd = spDcache.utwuDs**2 / spDcache.t lm = first + secnd pval = chisqprob(lm, 2) return (lm[0][0], pval[0][0]) + def lm_wx(reg, w): """ LM test for WX. Implemented as presented in Koley & Bera (2024) :cite:`KoleyBera2024`. @@ -698,7 +711,7 @@ def lm_wx(reg, w): # preliminaries # set up X1 (constant) and X (no constant) as x1 and xx x1 = reg.x - xx = x1[:,1:] + xx = x1[:, 1:] # WX wx = w.sparse * xx # end of preliminaries @@ -715,10 +728,11 @@ def lm_wx(reg, w): rsg1 = (xpwpu.T @ xqxi) @ xpwpu rsgam = rsg1[0][0] / reg.sig2n pval = chisqprob(rsgam, (reg.k - 1)) - rsgamma = (rsgam,pval) - return(rsgamma) + rsgamma = (rsgam, pval) + return rsgamma -def lm_spdurbin(reg,w): + +def lm_spdurbin(reg, w): """ Joint test for SDM. Implemented as presented in Koley & Bera (2024) :cite:`KoleyBera2024`. @@ -739,7 +753,7 @@ def lm_spdurbin(reg,w): # preliminaries # set up X1 (constant) and X (no constant) as x1 and xx x1 = reg.x - xx = x1[:,1:] + xx = x1[:, 1:] k = x1.shape[1] # WX wx = w.sparse * xx @@ -757,21 +771,23 @@ def lm_spdurbin(reg,w): pp = w.trcWtW_WW # end of preliminaries # J_11: block matrix with X1'X1 and n/2sig2n - jj1a = np.hstack((reg.xtx,np.zeros((k,1)))) - jj1b = np.hstack((np.zeros((1,k)),np.array([reg.n/(2.0*reg.sig2n)]).reshape(1,1))) - jj11 = np.vstack((jj1a,jj1b)) + jj1a = np.hstack((reg.xtx, np.zeros((k, 1)))) + jj1b = np.hstack( + (np.zeros((1, k)), np.array([reg.n / (2.0 * reg.sig2n)]).reshape(1, 1)) + ) + jj11 = np.vstack((jj1a, jj1b)) # J_12: matrix with k-1 rows X1'WX1b and X1'WX, and 1 row of zeros jj12a = np.hstack((x1.T @ wxb, x1.T @ wx)) - jj12 = np.vstack((jj12a,np.zeros((1,k)))) + jj12 = np.vstack((jj12a, np.zeros((1, k)))) # J_22 matrix with diagonal elements b'X1'W'WX1b + T.sig2n and X'W'WX # and off-diagonal element b'X1'W'WX jj22a = wxb.T @ wxb + pp * reg.sig2n - jj22a = jj22a.reshape(1,1) - wxbtwx = (wxb.T @ wx).reshape(1,k-1) - jj22b = np.hstack((jj22a,wxbtwx)) + jj22a = jj22a.reshape(1, 1) + wxbtwx = (wxb.T @ wx).reshape(1, k - 1) + jj22b = np.hstack((jj22a, wxbtwx)) wxtwx = wx.T @ wx - jj22c = np.hstack((wxbtwx.T,wxtwx)) - jj22 = np.vstack((jj22b,jj22c)) + jj22c = np.hstack((wxbtwx.T, wxtwx)) + jj22 = np.vstack((jj22b, jj22c)) # J^22 (the inverse) from J^22 = (J_22 - J_21.J_11^-1.J_12)^-1 jj11i = la.inv(jj11) j121121 = (jj12.T @ jj11i) @ jj12 @@ -780,14 +796,15 @@ def lm_spdurbin(reg,w): # rescale by sig2n jj22i = jj22i * reg.sig2n # statistic - dd = np.vstack((drho,dgam)) + dd = np.vstack((drho, dgam)) rsjoint = (dd.T @ jj22i) @ dd rsjoint = rsjoint[0][0] pval = chisqprob(rsjoint, k) rsrhogam = (rsjoint, pval) - return(rsrhogam) + return rsrhogam -def rlm_wx(reg,lmspdurbin,lmlag): + +def rlm_wx(reg, lmspdurbin, lmlag): """ Robust LM WX test. Implemented as presented in Koley & Bera (2024) :cite:`KoleyBera2024`. @@ -808,11 +825,12 @@ def rlm_wx(reg,lmspdurbin,lmlag): """ # robust gamma = rsjoint - rsrho rsgams = lmspdurbin[0] - lmlag[0] - pval = chisqprob(rsgams,(reg.k - 1)) + pval = chisqprob(rsgams, (reg.k - 1)) rsgamstar = (rsgams, pval) - return(rsgamstar) + return rsgamstar + -def rlm_durlag(lmspdurbin,lmwx): +def rlm_durlag(lmspdurbin, lmwx): """ Robust LM Lag - SDM. Implemented as presented in Koley & Bera (2024) :cite:`KoleyBera2024`. @@ -831,11 +849,12 @@ def rlm_durlag(lmspdurbin,lmwx): # robust rho = rsjoint - rsgam rsrhos = lmspdurbin[0] - lmwx[0] - pval = chisqprob(rsrhos,1) + pval = chisqprob(rsrhos, 1) rsrhostar = (rsrhos, pval) - return(rsrhostar) + return rsrhostar -def lm_slxerr(reg,lmerr,lmwx): + +def lm_slxerr(reg, lmerr, lmwx): """ Joint test for Error and WX. Implemented as presented in Koley & Bera (2024) :cite:`KoleyBera2024`. @@ -854,9 +873,10 @@ def lm_slxerr(reg,lmerr,lmwx): Pair of statistic and p-value for the Joint test for Error and WX. """ rslamgam = lmerr[0] + lmwx[0] - pval = chisqprob(rslamgam,reg.k) - rslamgamma = (rslamgam,pval) - return(rslamgamma) + pval = chisqprob(rslamgam, reg.k) + rslamgamma = (rslamgam, pval) + return rslamgamma + def get_mI(reg, w, spDcache): """ @@ -892,8 +912,8 @@ def get_vI(ols, w, ei, spDcache): B = spDcache.AB[1] trB = np.sum(B.diagonal()) * 4.0 - vi = (w.n ** 2 / (w.s0 ** 2 * (w.n - ols.k) * (w.n - ols.k + 2.0))) * ( - w.s1 + 2.0 * trA2 - trB - ((2.0 * (spDcache.trA ** 2)) / (w.n - ols.k)) + vi = (w.n**2 / (w.s0**2 * (w.n - ols.k) * (w.n - ols.k + 2.0))) * ( + w.s1 + 2.0 * trA2 - trB - ((2.0 * (spDcache.trA**2)) / (w.n - ols.k)) ) return vi @@ -947,7 +967,7 @@ def akTest(iv, w, spDcache): a = np.dot(etwz, np.dot(iv.varb, etwz.T)) s12 = (w.s0 / w.n) ** 2 phi2 = (spDcache.t + (4.0 / iv.sig2n) * a) / (s12 * w.n) - ak = w.n * mi ** 2 / phi2 + ak = w.n * mi**2 / phi2 pval = chisqprob(ak, 1) return (mi, ak[0][0], pval[0][0]) diff --git a/spreg/diagnostics_sur.py b/spreg/diagnostics_sur.py index 7fdfdb1..fca80f2 100644 --- a/spreg/diagnostics_sur.py +++ b/spreg/diagnostics_sur.py @@ -249,16 +249,9 @@ def surLMlag(n_eq, WS, bigy, bigX, bigE, bigYP, sig, varb): # I(b,b) inverse is varb # I(b,rho) - bp = sigi[0,] * spdot( - bigX[0].T, WbigYP - ) # initialize + bp = sigi[0,] * spdot(bigX[0].T, WbigYP) # initialize for r in range(1, n_eq): - bpwork = ( - sigi[ - r, - ] - * spdot(bigX[r].T, WbigYP) - ) + bpwork = sigi[r,] * spdot(bigX[r].T, WbigYP) bp = np.vstack((bp, bpwork)) # partitioned part i_inner = Ipp - np.dot(np.dot(bp.T, varb), bp) diff --git a/spreg/diagnostics_tsls.py b/spreg/diagnostics_tsls.py index 5d91874..2c9e220 100644 --- a/spreg/diagnostics_tsls.py +++ b/spreg/diagnostics_tsls.py @@ -1,6 +1,6 @@ """ -Diagnostics for two stage least squares regression estimations. - +Diagnostics for two stage least squares regression estimations. + """ __author__ = ( diff --git a/spreg/error_sp.py b/spreg/error_sp.py index 566147a..1d8e109 100644 --- a/spreg/error_sp.py +++ b/spreg/error_sp.py @@ -9,7 +9,16 @@ import numpy as np from numpy import linalg as la from . import ols as OLS -from .utils import set_endog, sp_att, optim_moments, get_spFilter, get_lags, spdot, RegressionPropsY, set_warn +from .utils import ( + set_endog, + sp_att, + optim_moments, + get_spFilter, + get_lags, + spdot, + RegressionPropsY, + set_warn, +) from . import twosls as TSLS from . import user_output as USER import pandas as pd @@ -23,7 +32,6 @@ class BaseGM_Error(RegressionPropsY): - """ GMM method for a spatial error model (note: no consistency checks diagnostics or constant added); based on Kelejian and Prucha @@ -91,7 +99,6 @@ class BaseGM_Error(RegressionPropsY): """ def __init__(self, y, x, w, hard_bound=False): - # 1a. OLS --> \tilde{betas} ols = OLS.BaseOLS(y=y, x=x) self.n, self.k = ols.x.shape @@ -120,7 +127,6 @@ def __init__(self, y, x, w, hard_bound=False): class GM_Error(BaseGM_Error): - """ GMM method for a spatial error model, with results and diagnostics; based on Kelejian and Prucha (1998, 1999) :cite:`Kelejian1998` :cite:`Kelejian1999`. @@ -294,43 +300,63 @@ class GM_Error(BaseGM_Error): """ def __init__( - self, y, x, w, slx_lags=0, slx_vars="All",vm=False, name_y=None, name_x=None, name_w=None, name_ds=None, latex=False, - hard_bound=False): - + self, + y, + x, + w, + slx_lags=0, + slx_vars="All", + vm=False, + name_y=None, + name_x=None, + name_w=None, + name_ds=None, + latex=False, + hard_bound=False, + ): n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # intialize in case of None, contains constant + name_x = USER.set_name_x( + name_x, x_constant + ) # intialize in case of None, contains constant set_warn(self, warn) - - self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES" - if slx_lags >0: - #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - #x_constant = np.hstack((x_constant, lag_x)) -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES" + if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) self.title += " WITH SLX (SLX-Error)" - - BaseGM_Error.__init__(self, y=y, x=x_constant, w=w.sparse, hard_bound=hard_bound) + + BaseGM_Error.__init__( + self, y=y, x=x_constant, w=w.sparse, hard_bound=hard_bound + ) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # already includes constant self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"], self.output["equation"] = (0, 0) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Endog_Error(RegressionPropsY): - """ GMM method for a spatial error model with endogenous variables (note: no consistency checks, diagnostics or constant added); based on Kelejian and @@ -413,7 +439,6 @@ class BaseGM_Endog_Error(RegressionPropsY): """ def __init__(self, y, x, yend, q, w, hard_bound=False): - # 1a. TSLS --> \tilde{betas} tsls = TSLS.BaseTSLS(y=y, x=x, yend=yend, q=q) self.n, self.k = tsls.z.shape @@ -442,7 +467,6 @@ def __init__(self, y, x, yend, q, w, hard_bound=False): class GM_Endog_Error(BaseGM_Endog_Error): - """ GMM method for a spatial error model with endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -468,7 +492,7 @@ class GM_Endog_Error(BaseGM_Endog_Error): Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. slx_vars : either "All" (default) or list of booleans to select x variables - to be lagged + to be lagged vm : boolean If True, include variance-covariance matrix in summary results @@ -668,29 +692,38 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize for None, includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize for None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES" - if slx_lags >0: - #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - #x_constant = np.hstack((x_constant, lag_x)) -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) - self.title += " WITH SLX (SLX-Error)" - BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend, q=q, hard_bound=hard_bound) + self.title += " WITH SLX (SLX-Error)" + BaseGM_Endog_Error.__init__( + self, y=y, x=x_constant, w=w.sparse, yend=yend, q=q, hard_bound=hard_bound + ) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # already includes constant self.name_yend = USER.set_name_yend(name_yend, yend) self.name_z = self.name_x + self.name_yend @@ -698,15 +731,15 @@ def __init__( self.name_q = USER.set_name_q(name_q, q) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + self.output["var_type"] = ( + ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] + ) + self.output["regime"], self.output["equation"] = (0, 0) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class GM_Combo(BaseGM_Endog_Error): - """ GMM method for a spatial lag and error model with endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -971,7 +1004,6 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -980,22 +1012,24 @@ def __init__( name_x = USER.set_name_x(name_x, x_constant) if slx_lags > 0: - yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) + yend2, q2, wx = set_endog( + y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags, slx_vars + ) x_constant = np.hstack((x_constant, wx)) else: yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - - set_warn(self, warn) # OLD - #if slx_lags == 0: - #yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - #else: - #yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) - #x_constant = np.hstack((x_constant, wx)) - - BaseGM_Endog_Error.__init__(self, y=y, x=x_constant, w=w.sparse, yend=yend2, q=q2, hard_bound=hard_bound) + # if slx_lags == 0: + # yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + # else: + # yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + # x_constant = np.hstack((x_constant, wx)) + + BaseGM_Endog_Error.__init__( + self, y=y, x=x_constant, w=w.sparse, yend=yend2, q=q2, hard_bound=hard_bound + ) self.rho = self.betas[-2] self.predy_e, self.e_pred, warn = sp_att( @@ -1004,36 +1038,37 @@ def __init__( set_warn(self, warn) self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL" # OLD - #if slx_lags > 0: -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - #self.title += " WITH SLX (GNSM)" + # if slx_lags > 0: + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + # self.title += " WITH SLX (GNSM)" # kx and wkx are used to replace complex calculation for output if slx_lags > 0: # adjust for flexwx - if (isinstance(slx_vars,list)): # slx_vars has True,False - if len(slx_vars) != x.shape[1] : + if isinstance(slx_vars, list): # slx_vars has True,False + if len(slx_vars) != x.shape[1]: raise Exception("slx_vars incompatible with x column dimensions") else: # use slx_vars to extract proper columns workname = name_x[1:] kx = len(workname) - vv = list(compress(workname,slx_vars)) + vv = list(compress(workname, slx_vars)) name_x += USER.set_name_spatial_lags(vv, slx_lags) wkx = slx_vars.count(True) else: kx = len(name_x) - 1 wkx = kx - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + name_x += USER.set_name_spatial_lags( + name_x[1:], slx_lags + ) # exclude constant self.title += " WITH SLX (GNSM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # constant already in list self.name_yend = USER.set_name_yend(name_yend, yend) self.name_yend.append(USER.set_name_yend_sp(self.name_y)) - self.name_z = self.name_x + self.name_yend self.name_z.append("lambda") self.name_q = USER.set_name_q(name_q, q) @@ -1041,45 +1076,66 @@ def __init__( if slx_lags > 0: # need to remove all but last SLX variables from name_x self.name_x0 = [] self.name_x0.append(self.name_x[0]) # constant - if (isinstance(slx_vars,list)): # boolean list passed + if isinstance(slx_vars, list): # boolean list passed # x variables that were not lagged - self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + self.name_x0.extend( + list(compress(self.name_x[1:], [not i for i in slx_vars])) + ) # last wkx variables self.name_x0.extend(self.name_x[-wkx:]) - else: - okx = int((self.k - self.yend.shape[1] - 1) / (slx_lags + 1)) # number of original exogenous vars + okx = int( + (self.k - self.yend.shape[1] - 1) / (slx_lags + 1) + ) # number of original exogenous vars self.name_x0.extend(self.name_x[-okx:]) - self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) - - #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + self.name_q.extend( + USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q) + ) + + # var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ( + ["x"] * (kx + 1) + + ["wx"] * wkx * slx_lags + + ["yend"] * (len(self.name_yend) - 1) + + ["rho", "lambda"] + ) else: - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) - var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] - - - #self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + self.name_q.extend( + USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q) + ) + var_types = ( + ["x"] * len(self.name_x) + + ["yend"] * (len(self.name_yend) - 1) + + ["rho", "lambda"] + ) + + # self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) - #self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] - self.output['var_type'] = var_types + # self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + self.output["var_type"] = var_types - self.output['regime'], self.output['equation'] = (0, 0) + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _spat_pseudo_r2(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) -class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error_Het, - GM_Combo_Het, GM_Error_Hom, GM_Endog_Error_Hom, GM_Combo_Hom): - +class GMM_Error( + GM_Error, + GM_Endog_Error, + GM_Combo, + GM_Error_Het, + GM_Endog_Error_Het, + GM_Combo_Het, + GM_Error_Hom, + GM_Endog_Error_Hom, + GM_Combo_Hom, +): """ Wrapper function to call any of the GMM methods for a spatial error model available in spreg @@ -1105,12 +1161,12 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error homoskedasticity, and 'kp98', which does not provide inference on the spatial parameter for the error term. add_wy : boolean - If True, then a spatial lag of the dependent variable is included. + If True, then a spatial lag of the dependent variable is included. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. + If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. slx_vars : either "All" (default) or list of booleans to select x variables - to be lagged + to be lagged vm : boolean If True, include variance-covariance matrix in summary results @@ -1123,7 +1179,7 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error name_yend : list of strings Names of endogenous variables for use in output name_q : list of strings - Names of instruments for use in output + Names of instruments for use in output name_ds : string Name of dataset for use in output latex : boolean @@ -1133,7 +1189,7 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error autoregressive parameter is outside the maximum/minimum bounds. spat_diag : boolean, ignored, included for compatibility with other models **kwargs : keywords - Additional arguments to pass on to the estimators. + Additional arguments to pass on to the estimators. See the specific functions for details on what can be used. Attributes @@ -1194,8 +1250,8 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error name_q : list of strings (optional) Names of external instruments name_h : list of strings (optional) - Names of all instruments used in ouput - + Names of all instruments used in ouput + Examples -------- @@ -1314,55 +1370,287 @@ class GMM_Error(GM_Error, GM_Endog_Error, GM_Combo, GM_Error_Het, GM_Endog_Error """ def __init__( - self, y, x, w, yend=None, q=None, estimator='het', add_wy=False, slx_lags=0, slx_vars="All",vm=False, name_y=None, name_x=None, name_w=None, name_yend=None, - name_q=None, name_ds=None, latex=False, hard_bound=False,spat_diag=False, **kwargs): - - if estimator == 'het': + self, + y, + x, + w, + yend=None, + q=None, + estimator="het", + add_wy=False, + slx_lags=0, + slx_vars="All", + vm=False, + name_y=None, + name_x=None, + name_w=None, + name_yend=None, + name_q=None, + name_ds=None, + latex=False, + hard_bound=False, + spat_diag=False, + **kwargs, + ): + if estimator == "het": if yend is None and not add_wy: - GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Error_Het.__init__( + self, + y=y, + x=x, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) elif yend is not None and not add_wy: - GM_Endog_Error_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Endog_Error_Het.__init__( + self, + y=y, + x=x, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) elif add_wy: - GM_Combo_Het.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Combo_Het.__init__( + self, + y=y, + x=x, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') - GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) - elif estimator == 'hom': + set_warn( + self, + "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", + ) + GM_Error_Het.__init__( + self, + y=y, + x=x, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + ) + elif estimator == "hom": if yend is None and not add_wy: - GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars,vm=vm, name_y=name_y, name_x=name_x, - name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Error_Hom.__init__( + self, + y=y, + x=x, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) elif yend is not None and not add_wy: - GM_Endog_Error_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Endog_Error_Hom.__init__( + self, + y=y, + x=x, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) elif add_wy: - GM_Combo_Hom.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Combo_Hom.__init__( + self, + y=y, + x=x, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') - GM_Error_Hom.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars,vm=vm, name_y=name_y, name_x=name_x, - name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) - elif estimator == 'kp98': + set_warn( + self, + "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", + ) + GM_Error_Hom.__init__( + self, + y=y, + x=x, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + ) + elif estimator == "kp98": if yend is None and not add_wy: - GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Error.__init__( + self, + y=y, + x=x, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) elif yend is not None and not add_wy: - GM_Endog_Error.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Endog_Error.__init__( + self, + y=y, + x=x, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) elif add_wy: - GM_Combo.__init__(self, y=y, x=x, yend=yend, q=q, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound, **kwargs) + GM_Combo.__init__( + self, + y=y, + x=x, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + **kwargs, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') - GM_Error.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, slx_vars=slx_vars, vm=vm, name_y=name_y, name_x=name_x, - name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) + set_warn( + self, + "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", + ) + GM_Error.__init__( + self, + y=y, + x=x, + w=w, + slx_lags=slx_lags, + slx_vars=slx_vars, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.') - GM_Error_Het.__init__(self, y=y, x=x, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - name_w=name_w, name_ds=name_ds, latex=latex, hard_bound=hard_bound) + set_warn( + self, + "Combination of arguments passed to GMM_Error not allowed. Using default arguments instead.", + ) + GM_Error_Het.__init__( + self, + y=y, + x=x, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + name_w=name_w, + name_ds=name_ds, + latex=latex, + hard_bound=hard_bound, + ) def _momentsGM_Error(w, u): @@ -1405,15 +1693,14 @@ def _test(): if __name__ == "__main__": - _test() import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49,1)) + y = np.reshape(y, (49, 1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -1425,12 +1712,23 @@ def _test(): q = np.array(q).T w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = 'r' - #reg = GM_Error(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) - #reg = GM_Endog_Error(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + w.transform = "r" + # reg = GM_Error(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) + # reg = GM_Endog_Error(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True) - reg = GM_Combo(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], name_q=['discbd'], - name_ds='columbus', vm=True) + reg = GM_Combo( + y, + X, + yd, + q, + w=w, + name_x=["inc"], + name_y="hoval", + name_yend=["crime"], + name_q=["discbd"], + name_ds="columbus", + vm=True, + ) print(reg.output) - print(reg.summary) \ No newline at end of file + print(reg.summary) diff --git a/spreg/error_sp_het.py b/spreg/error_sp_het.py index e688ee1..b8bf01e 100755 --- a/spreg/error_sp_het.py +++ b/spreg/error_sp_het.py @@ -25,7 +25,6 @@ class BaseGM_Error_Het(RegressionPropsY): - """ GMM method for a spatial error model with heteroskedasticity (note: no consistency checks, diagnostics or constant added); based on @@ -110,8 +109,9 @@ class BaseGM_Error_Het(RegressionPropsY): [ 0.4118 0.168 ]] """ - def __init__(self, y, x, w, max_iter=1, epsilon=0.00001, step1c=False, hard_bound=False): - + def __init__( + self, y, x, w, max_iter=1, epsilon=0.00001, step1c=False, hard_bound=False + ): self.step1c = step1c # 1a. OLS --> \tilde{betas} ols = OLS.BaseOLS(y=y, x=x) @@ -153,10 +153,15 @@ def __init__(self, y, x, w, max_iter=1, epsilon=0.00001, step1c=False, hard_boun self.iter_stop = UTILS.iter_msg(self.iteration, max_iter) if hard_bound: if abs(lambda3) >= 0.99: - raise Exception("Spatial error parameter was outside the bounds of -0.99 and 0.99") + raise Exception( + "Spatial error parameter was outside the bounds of -0.99 and 0.99" + ) else: if abs(lambda3) >= 0.99: - set_warn(self, "Spatial error parameter was outside the bounds of -0.99 and 0.99") + set_warn( + self, + "Spatial error parameter was outside the bounds of -0.99 and 0.99", + ) sigma = get_psi_sigma(w, self.u, lambda3) vc3 = get_vc_het(w, wA1, sigma) @@ -371,33 +376,40 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES (HET)" - if slx_lags >0: - #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - #x_constant = np.hstack((x_constant, lag_x)) -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) self.title += " WITH SLX (SLX-Error)" # OLD - #if slx_lags >0: - #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - #x_constant = np.hstack((x_constant, lag_x)) -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant - #self.title += " WITH SLX (SLX-Error)" + # if slx_lags >0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + # self.title += " WITH SLX (SLX-Error)" BaseGM_Error_Het.__init__( self, @@ -407,25 +419,23 @@ def __init__( max_iter=max_iter, step1c=step1c, epsilon=epsilon, - hard_bound = hard_bound + hard_bound=hard_bound, ) - self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # constant already included self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * (len(self.name_x)-1) + ['lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Endog_Error_Het(RegressionPropsY): - """ GMM method for a spatial error model with heteroskedasticity and endogenous variables (note: no consistency checks, diagnostics or constant @@ -550,7 +560,6 @@ def __init__( inv_method="power_exp", hard_bound=False, ): - self.step1c = step1c # 1a. reg --> \tilde{betas} tsls = TSLS.BaseTSLS(y=y, x=x, yend=yend, q=q) @@ -590,11 +599,11 @@ def __init__( lambda2 = lambda1 # Forcing the 1st step lambda to be in the range [-0.9, 0.9] to avoid perfect collinearity in step 2 in case of SLX-Error or GNS models - #if lambda2 > 0.9: + # if lambda2 > 0.9: # lambda_old = 0.9 - #elif lambda2 < -0.9: + # elif lambda2 < -0.9: # lambda_old = -0.9 - #else: + # else: lambda_old = lambda2 self.iteration, eps = 0, 1 @@ -630,11 +639,11 @@ def __init__( moments_i = UTILS._moments2eqs(wA1, w, self.u) lambda3 = UTILS.optim_moments(moments_i, vc2) - #if abs(lambda3) <= 0.9: + # if abs(lambda3) <= 0.9: # pass - #elif lambda3 > 0.9: + # elif lambda3 > 0.9: # lambda3 = 0.9 - #elif lambda3 < -0.9: + # elif lambda3 < -0.9: # lambda3 = -0.9 eps = abs(lambda3 - lambda_old) @@ -644,14 +653,24 @@ def __init__( self.iter_stop = UTILS.iter_msg(self.iteration, max_iter) if hard_bound: if abs(lambda3) >= 0.99: - raise Exception("Spatial error parameter was outside the bounds of -0.99 and 0.99") + raise Exception( + "Spatial error parameter was outside the bounds of -0.99 and 0.99" + ) if abs(tsls_s.betas[-1]) >= 0.99: - raise Exception("Spatial lag parameter was outside the bounds of -0.99 and 0.99") + raise Exception( + "Spatial lag parameter was outside the bounds of -0.99 and 0.99" + ) else: if abs(lambda3) >= 0.99: - set_warn(self, "Spatial error parameter was outside the bounds of -0.99 and 0.99") + set_warn( + self, + "Spatial error parameter was outside the bounds of -0.99 and 0.99", + ) if abs(tsls_s.betas[-1]) >= 0.99: - set_warn(self, "Spatial lag parameter was outside the bounds of -0.99 and 0.99") + set_warn( + self, + "Spatial lag parameter was outside the bounds of -0.99 and 0.99", + ) zs = UTILS.get_spFilter(w, lambda3, self.z) P = get_P_hat(self, tsls.hthi, zs) @@ -663,7 +682,6 @@ def __init__( class GM_Endog_Error_Het(BaseGM_Endog_Error_Het): - """ GMM method for a spatial error model with heteroskedasticity and endogenous variables, with results and diagnostics; based on @@ -689,7 +707,7 @@ class GM_Endog_Error_Het(BaseGM_Endog_Error_Het): Number of spatial lags of X to include in the model specification. If slx_lags>0, the specification becomes of the SLX-Error type. slx_vars : either "All" (default) or list of booleans to select x variables - to be lagged + to be lagged max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional @@ -916,32 +934,37 @@ def __init__( name_w=None, name_ds=None, latex=False, - hard_bound=False + hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HET)" - if slx_lags >0: - - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + if slx_lags > 0: + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) - self.title += " WITH SLX (SLX-Error)" + self.title += " WITH SLX (SLX-Error)" # OLD - #if slx_lags > 0: - #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - #x_constant = np.hstack((x_constant, lag_x)) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant - #self.title += " WITH SLX (SLX-Error)" - + # if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + # self.title += " WITH SLX (SLX-Error)" BaseGM_Endog_Error_Het.__init__( self, @@ -958,7 +981,7 @@ def __init__( ) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # constant already included self.name_yend = USER.set_name_yend(name_yend, yend) self.name_z = self.name_x + self.name_yend @@ -966,16 +989,16 @@ def __init__( self.name_q = USER.set_name_q(name_q, q) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + self.output["var_type"] = ( + ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] + ) + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Combo_Het(BaseGM_Endog_Error_Het): - """ GMM method for a spatial lag and error model with heteroskedasticity and endogenous variables (note: no consistency checks, diagnostics or constant @@ -1130,7 +1153,6 @@ def __init__( inv_method="power_exp", hard_bound=False, ): - BaseGM_Endog_Error_Het.__init__( self, y=y, @@ -1147,7 +1169,6 @@ def __init__( class GM_Combo_Het(BaseGM_Combo_Het): - """ GMM method for a spatial lag and error model with heteroskedasticity and endogenous variables, with results and diagnostics; based on @@ -1431,27 +1452,30 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize in case None, includes constant set_warn(self, warn) if slx_lags > 0: - yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) + yend2, q2, wx = set_endog( + y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags, slx_vars + ) x_constant = np.hstack((x_constant, wx)) else: yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) # OLS - #if slx_lags == 0: - #yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - #else: - #yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) - #x_constant = np.hstack((x_constant, wx)) + # if slx_lags == 0: + # yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + # else: + # yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + # x_constant = np.hstack((x_constant, wx)) BaseGM_Combo_Het.__init__( self, @@ -1476,31 +1500,33 @@ def __init__( self.title = "SPATIALLY WEIGHTED 2SLS- GM-COMBO MODEL (HET)" if slx_lags > 0: # adjust for flexwx - if (isinstance(slx_vars,list)): # slx_vars has True,False - if len(slx_vars) != x.shape[1] : + if isinstance(slx_vars, list): # slx_vars has True,False + if len(slx_vars) != x.shape[1]: raise Exception("slx_vars incompatible with x column dimensions") else: # use slx_vars to extract proper columns workname = name_x[1:] kx = len(workname) - vv = list(compress(workname,slx_vars)) + vv = list(compress(workname, slx_vars)) name_x += USER.set_name_spatial_lags(vv, slx_lags) wkx = slx_vars.count(True) else: kx = len(name_x) - 1 wkx = kx - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + name_x += USER.set_name_spatial_lags( + name_x[1:], slx_lags + ) # exclude constant self.title += " WITH SLX (GNSM)" # OLD - #if slx_lags > 0: -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant - #self.title += " WITH SLX (GNSM)" + # if slx_lags > 0: + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # no constant + # self.title += " WITH SLX (GNSM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) - self.name_x = name_x # constant already included + # self.name_x = USER.set_name_x(name_x, x_constant) + self.name_x = name_x # constant already included self.name_yend = USER.set_name_yend(name_yend, yend) self.name_yend.append(USER.set_name_yend_sp(self.name_y)) self.name_z = self.name_x + self.name_yend @@ -1510,39 +1536,52 @@ def __init__( if slx_lags > 0: # need to remove all but last SLX variables from name_x self.name_x0 = [] self.name_x0.append(self.name_x[0]) # constant - if (isinstance(slx_vars,list)): # boolean list passed + if isinstance(slx_vars, list): # boolean list passed # x variables that were not lagged - self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + self.name_x0.extend( + list(compress(self.name_x[1:], [not i for i in slx_vars])) + ) # last wkx variables self.name_x0.extend(self.name_x[-wkx:]) - else: - okx = int((self.k - self.yend.shape[1] - 1) / (slx_lags + 1)) # number of original exogenous vars + okx = int( + (self.k - self.yend.shape[1] - 1) / (slx_lags + 1) + ) # number of original exogenous vars self.name_x0.extend(self.name_x[-okx:]) - self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + self.name_q.extend( + USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q) + ) - #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + # var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ( + ["x"] * (kx + 1) + + ["wx"] * wkx * slx_lags + + ["yend"] * (len(self.name_yend) - 1) + + ["rho", "lambda"] + ) else: - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) - var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] - - + self.name_q.extend( + USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q) + ) + var_types = ( + ["x"] * len(self.name_x) + + ["yend"] * (len(self.name_yend) - 1) + + ["rho", "lambda"] + ) - #self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + # self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - - self.output['var_type'] = var_types + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + + self.output["var_type"] = var_types - #self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend)-1) + ['rho', 'lambda'] + # self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend)-1) + ['rho', 'lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -1709,7 +1748,6 @@ def get_a1a2(w, wA1, reg, lambdapar, P, zs, inv_method, filt): def get_vc_het_tsls( w, wA1, reg, lambdapar, P, zs, inv_method, filt=True, save_a1a2=False ): - sigma = get_psi_sigma(w, reg.u, lambdapar) vc1 = get_vc_het(w, wA1, sigma) a1, a2 = get_a1a2(w, wA1, reg, lambdapar, P, zs, inv_method, filt) @@ -1793,9 +1831,9 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49,1)) + y = np.reshape(y, (49, 1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -1807,11 +1845,23 @@ def _test(): q = np.array(q).T w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = 'r' + w.transform = "r" # reg = GM_Error_Het(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) # reg = GM_Endog_Error_Het(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True) - reg = GM_Combo_Het(y, X, yd, q, w=w, step1c=True, name_x=['inc'], name_y='hoval', name_yend=['crime'], name_q=['discbd'], name_ds='columbus', - vm=True) + reg = GM_Combo_Het( + y, + X, + yd, + q, + w=w, + step1c=True, + name_x=["inc"], + name_y="hoval", + name_yend=["crime"], + name_q=["discbd"], + name_ds="columbus", + vm=True, + ) print(reg.output) - print(reg.summary) \ No newline at end of file + print(reg.summary) diff --git a/spreg/error_sp_het_regimes.py b/spreg/error_sp_het_regimes.py index 413f564..c269014 100644 --- a/spreg/error_sp_het_regimes.py +++ b/spreg/error_sp_het_regimes.py @@ -1,6 +1,7 @@ """ Spatial Error with Heteroskedasticity and Regimes family of models """ + __author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu" import numpy as np @@ -21,7 +22,15 @@ get_vc_het_tsls, get_Omega_GS2SLS, ) -from .utils import RegressionPropsY, spdot, set_endog, sphstack, set_warn, sp_att, get_lags +from .utils import ( + RegressionPropsY, + spdot, + set_endog, + sphstack, + set_warn, + sp_att, + get_lags, +) from scipy import sparse as SP from libpysal.weights.spatial_lag import lag_spatial from platform import system @@ -30,7 +39,6 @@ class GM_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): - """ GMM method for a spatial error model with heteroskedasticity and regimes; based on Arraiz et al :cite:`Arraiz2010`, following Anselin :cite:`Anselin2011`. @@ -325,7 +333,6 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -344,7 +351,7 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags >0: + if slx_lags > 0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -439,24 +446,36 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED MODEL (HET) - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HET) - REGIMES" - + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HET) - REGIMES" + self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_x, - columns=['var_names']) - self.output['var_type'] = ['x']*(len(self.name_x)-1)+['lambda'] - self.output['regime'] = x_rlist + ['_Global'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"] = x_rlist + ["_Global"] + self.output["equation"] = 0 self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _error_het_regimes_multi( - self, y, x, regimes, w, slx_lags, cores, max_iter, epsilon, step1c, cols2regi, vm, name_x, latex, hard_bound + self, + y, + x, + regimes, + w, + slx_lags, + cores, + max_iter, + epsilon, + step1c, + cols2regi, + vm, + name_x, + latex, + hard_bound, ): - regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -538,7 +557,9 @@ def _error_het_regimes_multi( results = {} self.name_y, self.name_x = [], [] counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -555,26 +576,34 @@ def _error_het_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.e_filtered[ - regi_ids[r], - ] = results[r].e_filtered + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.e_filtered[regi_ids[r],] = results[r].e_filtered self.name_y += results[r].name_y - results[r].name_x = [str(r) + '_lambda' if value == 'lambda' else value for value in results[r].name_x] + results[r].name_x = [ + str(r) + "_lambda" if value == "lambda" else value + for value in results[r].name_x + ] self.name_x += results[r].name_x results[r].other_top = _summary_iteration(results[r]) - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, - 'var_type': ['x'] * (len(results[r].name_x)-1) + - ['lambda'], - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_x, + "var_type": ["x"] * (len(results[r].name_x) - 1) + + ["lambda"], + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -582,7 +611,6 @@ def _error_het_regimes_multi( class GM_Endog_Error_Het_Regimes(RegressionPropsY, REGI.Regimes_Frame): - """ GMM method for a spatial error model with heteroskedasticity, regimes and endogenous variables, with results and diagnostics; based on Arraiz et al @@ -938,7 +966,6 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -1114,17 +1141,20 @@ def __init__( self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - self.output['var_type'] = ['x']*len(self.name_x)+['yend']*len(self.name_yend)+['lambda'] - self.output['regime'] = x_rlist + yend_rlist + ['_Global'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + self.output["var_type"] = ( + ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] + ) + self.output["regime"] = x_rlist + yend_rlist + ["_Global"] + self.output["equation"] = 0 if summ: self.other_top = _summary_iteration(self) if slx_lags == 0: - self.title = ("GM SPATIALLY WEIGHTED 2SLS (HET) - REGIMES") + self.title = "GM SPATIALLY WEIGHTED 2SLS (HET) - REGIMES" else: - self.title = ("GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HET) - REGIMES") + self.title = ( + "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HET) - REGIMES" + ) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _endog_error_het_regimes_multi( @@ -1150,7 +1180,6 @@ def _endog_error_het_regimes_multi( latex, hard_bound, ): - regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -1255,7 +1284,9 @@ def _endog_error_het_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -1272,18 +1303,12 @@ def _endog_error_het_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.e_filtered[ - regi_ids[r], - ] = results[r].e_filtered + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.e_filtered[regi_ids[r],] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend @@ -1291,21 +1316,36 @@ def _endog_error_het_regimes_multi( self.name_z += results[r].name_z self.name_h += results[r].name_h if add_lag != False: - self.predy_e[ - regi_ids[r], - ] = results[r].predy_e - self.e_pred[ - regi_ids[r], - ] = results[r].e_pred + self.predy_e[regi_ids[r],] = results[r].predy_e + self.e_pred[regi_ids[r],] = results[r].e_pred results[r].other_top = _spat_pseudo_r2(results[r]) - v_type = ['x'] * len(results[r].name_x) + ['yend'] * (len(results[r].name_yend)-1) + ['rho','lambda'] + v_type = ( + ["x"] * len(results[r].name_x) + + ["yend"] * (len(results[r].name_yend) - 1) + + ["rho", "lambda"] + ) else: results[r].other_top = "" - v_type = ['x'] * len(results[r].name_x) + ['yend'] * len(results[r].name_yend) + ['lambda'] + v_type = ( + ["x"] * len(results[r].name_x) + + ["yend"] * len(results[r].name_yend) + + ["lambda"] + ) results[r].other_top += _summary_iteration(results[r]) - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, - 'var_type': v_type, - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_z, + "var_type": v_type, + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -1313,7 +1353,6 @@ def _endog_error_het_regimes_multi( class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): - """ GMM method for a spatial lag and error model with heteroskedasticity, regimes and endogenous variables, with results and diagnostics; @@ -1360,7 +1399,7 @@ class GM_Combo_Het_Regimes(GM_Endog_Error_Het_Regimes): the spatial parameter is fixed across regimes. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the GNSM type. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1712,10 +1751,10 @@ def __init__( hard_bound=False, ): if regime_lag_sep and not regime_err_sep: - set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") regime_err_sep = True if regime_err_sep and not regime_lag_sep: - set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) self.step1c = step1c @@ -1732,20 +1771,35 @@ def __init__( regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): - set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") + set_warn( + self, + "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.", + ) cols2regi = "all" if slx_lags > 0: - yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + yend2, q2, wx = set_endog( + y, x_constant, w, yend, q, w_lags, lag_q, slx_lags + ) x_constant = np.hstack((x_constant, wx)) name_slx = USER.set_name_spatial_lags(name_x, slx_lags) - name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) - name_x += name_slx - cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) + name_q.extend( + USER.set_name_q_sp( + name_slx[-len(name_x) :], w_lags, name_q, lag_q, force_all=True + ) + ) + name_x += name_slx + cols2regi = REGI.check_cols2regi( + constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False + ) else: - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + name_q.extend( + USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True) + ) yend2, q2 = yend, q - cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + cols2regi = REGI.check_cols2regi( + constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False + ) self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes @@ -1807,8 +1861,10 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET) - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO + SLX (GNSM-HET) - REGIMES" - self.output.iat[-2, self.output.columns.get_loc('var_type')] = 'rho' + self.title = ( + "GM SPATIALLY WEIGHTED 2SLS-COMBO + SLX (GNSM-HET) - REGIMES" + ) + self.output.iat[-2, self.output.columns.get_loc("var_type")] = "rho" self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -1835,7 +1891,13 @@ def _work_error( y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] model = BaseGM_Error_Het( - y_r, x_r, w_r.sparse, max_iter=max_iter, epsilon=epsilon, step1c=step1c, hard_bound=hard_bound, + y_r, + x_r, + w_r.sparse, + max_iter=max_iter, + epsilon=epsilon, + step1c=step1c, + hard_bound=hard_bound, ) set_warn(model, warn) model.w = w_r @@ -1905,22 +1967,22 @@ def _work_endog_error( w_r, model.y, model.predy, model.yend[:, -1].reshape(model.n, 1), model.rho ) set_warn(model, warn) - + if slx_lags == 0: if add_lag != False: - model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET)- REGIME %s" % r + model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HET)- REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS (HET) - REGIME %s" % r else: if add_lag != False: - model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HET) - REGIME %s" % r + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HET) - REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HET) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + [str(r)+"lambda"] + model.name_z = model.name_x + model.name_yend + [str(r) + "lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -1943,9 +2005,9 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49,1)) + y = np.reshape(y, (49, 1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -1956,16 +2018,31 @@ def _test(): q.append(db.by_col("DISCBD")) q = np.array(q).T - r_var = 'NSA' + r_var = "NSA" regimes = db.by_col(r_var) w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = 'r' - #reg = GM_Error_Het_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, + w.transform = "r" + # reg = GM_Error_Het_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, # regime_err_sep=True) - #reg = GM_Endog_Error_Het_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # reg = GM_Endog_Error_Het_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True, regime_err_sep=True) - reg = GM_Combo_Het_Regimes(y, X, regimes, yd, q, w=w, step1c=True, name_x=['inc'], name_y='hoval', name_yend=['crime'], - name_q=['discbd'], name_ds='columbus', vm=True, regime_err_sep=False, regime_lag_sep=False) + reg = GM_Combo_Het_Regimes( + y, + X, + regimes, + yd, + q, + w=w, + step1c=True, + name_x=["inc"], + name_y="hoval", + name_yend=["crime"], + name_q=["discbd"], + name_ds="columbus", + vm=True, + regime_err_sep=False, + regime_lag_sep=False, + ) print(reg.output) - print(reg.summary) \ No newline at end of file + print(reg.summary) diff --git a/spreg/error_sp_hom.py b/spreg/error_sp_hom.py index 2a8c435..6f2f438 100644 --- a/spreg/error_sp_hom.py +++ b/spreg/error_sp_hom.py @@ -1,5 +1,5 @@ """ -Hom family of models based on: :cite:`Drukker2013` +Hom family of models based on: :cite:`Drukker2013` Following: :cite:`Anselin2011` """ @@ -24,7 +24,6 @@ class BaseGM_Error_Hom(RegressionPropsY): - """ GMM method for a spatial error model with homoskedasticity (note: no consistency checks, diagnostics or constant added); based on @@ -120,7 +119,9 @@ class BaseGM_Error_Hom(RegressionPropsY): [ -2.40000000e-03 3.00000000e-04 -1.00000000e-04 3.37000000e-02]] """ - def __init__(self, y, x, w, max_iter=1, epsilon=0.00001, A1="hom_sc", hard_bound=False): + def __init__( + self, y, x, w, max_iter=1, epsilon=0.00001, A1="hom_sc", hard_bound=False + ): if A1 == "hom": wA1 = get_A1_hom(w) elif A1 == "hom_sc": @@ -166,7 +167,6 @@ def __init__(self, y, x, w, max_iter=1, epsilon=0.00001, A1="hom_sc", hard_bound class GM_Error_Hom(BaseGM_Error_Hom): - """ GMM method for a spatial error model with homoskedasticity, with results and diagnostics; based on Drukker et al. (2013) :cite:`Drukker2013`, following Anselin @@ -367,30 +367,34 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED LEAST SQUARES (HOM)" - if slx_lags >0: - - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + if slx_lags > 0: + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) self.title += " WITH SLX (SLX-Error)" # OLD - #if slx_lags >0: - #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - #x_constant = np.hstack((x_constant, lag_x)) -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - #self.title += " WITH SLX (SLX-Error)" - - + # if slx_lags >0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + # self.title += " WITH SLX (SLX-Error)" BaseGM_Error_Hom.__init__( self, @@ -400,24 +404,23 @@ def __init__( A1=A1, max_iter=max_iter, epsilon=epsilon, - hard_bound=hard_bound + hard_bound=hard_bound, ) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # constant already included self.name_x.append("lambda") self.name_w = USER.set_name_w(name_w, w) self.A1 = A1 - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Endog_Error_Hom(RegressionPropsY): - """ GMM method for a spatial error model with homoskedasticity and endogenous variables (note: no consistency checks, diagnostics or constant @@ -531,8 +534,18 @@ class BaseGM_Endog_Error_Hom(RegressionPropsY): """ - def __init__(self, y, x, yend, q, w, max_iter=1, epsilon=0.00001, A1="hom_sc", hard_bound=False): - + def __init__( + self, + y, + x, + yend, + q, + w, + max_iter=1, + epsilon=0.00001, + A1="hom_sc", + hard_bound=False, + ): if A1 == "hom": wA1 = get_A1_hom(w) elif A1 == "hom_sc": @@ -586,7 +599,6 @@ def __init__(self, y, x, yend, q, w, max_iter=1, epsilon=0.00001, A1="hom_sc", h class GM_Endog_Error_Hom(BaseGM_Endog_Error_Hom): - """ GMM method for a spatial error model with homoskedasticity and endogenous variables, with results and diagnostics; based on Drukker et al. (2013) @@ -610,9 +622,9 @@ class GM_Endog_Error_Hom(BaseGM_Endog_Error_Hom): Spatial weights object slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error type. + If slx_lags>0, the specification becomes of the SLX-Error type. slx_vars : either "All" (default) or list of booleans to select x variables - to be lagged + to be lagged max_iter : int Maximum number of iterations of steps 2a and 2b from :cite:`Arraiz2010`. Note: epsilon provides an additional stop condition. @@ -839,31 +851,36 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize in case None, includes constant set_warn(self, warn) self.title = "GM SPATIALLY WEIGHTED TWO STAGE LEAST SQUARES (HOM)" - if slx_lags >0: - - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + if slx_lags > 0: + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) - self.title += " WITH SLX (SLX-Error)" + self.title += " WITH SLX (SLX-Error)" # OLD - #if slx_lags > 0: - #lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - #x_constant = np.hstack((x_constant, lag_x)) -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - #self.title += " WITH SLX (SLX-Error)" - + # if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + # self.title += " WITH SLX (SLX-Error)" BaseGM_Endog_Error_Hom.__init__( self, @@ -879,7 +896,7 @@ def __init__( ) self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # already includes constant self.name_yend = USER.set_name_yend(name_yend, yend) self.name_z = self.name_x + self.name_yend @@ -888,16 +905,16 @@ def __init__( self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) self.A1 = A1 - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + self.output["var_type"] = ( + ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] + ) + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) class BaseGM_Combo_Hom(BaseGM_Endog_Error_Hom): - """ GMM method for a spatial lag and error model with homoskedasticity and endogenous variables (note: no consistency checks, diagnostics or constant @@ -1055,7 +1072,6 @@ def __init__( A1="hom_sc", hard_bound=False, ): - BaseGM_Endog_Error_Hom.__init__( self, y=y, @@ -1071,7 +1087,6 @@ def __init__( class GM_Combo_Hom(BaseGM_Combo_Hom): - """ GMM method for a spatial lag and error model with homoskedasticity and endogenous variables, with results and diagnostics; based on Drukker et @@ -1350,28 +1365,30 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize in case None, includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize in case None, includes constant set_warn(self, warn) if slx_lags > 0: - yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) + yend2, q2, wx = set_endog( + y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags, slx_vars + ) x_constant = np.hstack((x_constant, wx)) else: yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - # OLD - #if slx_lags == 0: - #yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) - #else: - #yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) - #x_constant = np.hstack((x_constant, wx)) + # if slx_lags == 0: + # yend2, q2 = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q) + # else: + # yend2, q2, wx = set_endog(y, x_constant[:, 1:], w, yend, q, w_lags, lag_q, slx_lags) + # x_constant = np.hstack((x_constant, wx)) BaseGM_Combo_Hom.__init__( self, @@ -1395,32 +1412,32 @@ def __init__( self.title = "SPATIALLY WEIGHTED 2SLS- GM-COMBO MODEL (HOM)" if slx_lags > 0: # adjust for flexwx - if (isinstance(slx_vars,list)): # slx_vars has True,False - if len(slx_vars) != x.shape[1] : + if isinstance(slx_vars, list): # slx_vars has True,False + if len(slx_vars) != x.shape[1]: raise Exception("slx_vars incompatible with x column dimensions") else: # use slx_vars to extract proper columns workname = name_x[1:] kx = len(workname) - vv = list(compress(workname,slx_vars)) + vv = list(compress(workname, slx_vars)) name_x += USER.set_name_spatial_lags(vv, slx_lags) wkx = slx_vars.count(True) else: kx = len(name_x) - 1 wkx = kx - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + name_x += USER.set_name_spatial_lags( + name_x[1:], slx_lags + ) # exclude constant self.title += " WITH SLX (GNSM)" - # OLD - #if slx_lags > 0: -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - #name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - #self.title += " WITH SLX (GNSM)" - + # if slx_lags > 0: + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + # self.title += " WITH SLX (GNSM)" self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) -# self.name_x = USER.set_name_x(name_x, x_constant) + # self.name_x = USER.set_name_x(name_x, x_constant) self.name_x = name_x # constant already included self.name_yend = USER.set_name_yend(name_yend, yend) self.name_yend.append(USER.set_name_yend_sp(self.name_y)) @@ -1431,42 +1448,57 @@ def __init__( if slx_lags > 0: # need to remove all but last SLX variables from name_x self.name_x0 = [] self.name_x0.append(self.name_x[0]) # constant - if (isinstance(slx_vars,list)): # boolean list passed + if isinstance(slx_vars, list): # boolean list passed # x variables that were not lagged - self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + self.name_x0.extend( + list(compress(self.name_x[1:], [not i for i in slx_vars])) + ) # last wkx variables self.name_x0.extend(self.name_x[-wkx:]) - else: - okx = int((self.k - self.yend.shape[1] - 1) / (slx_lags + 1)) # number of original exogenous vars + okx = int( + (self.k - self.yend.shape[1] - 1) / (slx_lags + 1) + ) # number of original exogenous vars self.name_x0.extend(self.name_x[-okx:]) - self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) - - #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] + self.name_q.extend( + USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q) + ) + + # var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ( + ["x"] * (kx + 1) + + ["wx"] * wkx * slx_lags + + ["yend"] * (len(self.name_yend) - 1) + + ["rho", "lambda"] + ) else: - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) - var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho','lambda'] - - - #self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) + self.name_q.extend( + USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q) + ) + var_types = ( + ["x"] * len(self.name_x) + + ["yend"] * (len(self.name_yend) - 1) + + ["rho", "lambda"] + ) + + # self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.name_w = USER.set_name_w(name_w, w) self.A1 = A1 - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - - self.output['var_type'] = var_types + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + + self.output["var_type"] = var_types - #self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + # self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho', 'lambda'] + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) + # Functions @@ -1547,8 +1579,8 @@ def get_vc_hom(w, wA1, wA2, reg, lambdapar, z_s=None, for_omegaOLS=False): u_s = get_spFilter(w, lambdapar, reg.u) n = float(w.shape[0]) sig2 = np.dot(u_s.T, u_s) / n - mu3 = np.sum(u_s ** 3) / n - mu4 = np.sum(u_s ** 4) / n + mu3 = np.sum(u_s**3) / n + mu4 = np.sum(u_s**4) / n tr11 = wA1 * wA1 tr11 = np.sum(tr11.diagonal()) @@ -1558,9 +1590,9 @@ def get_vc_hom(w, wA1, wA2, reg, lambdapar, z_s=None, for_omegaOLS=False): tr22 = np.sum(tr22.diagonal()) vecd1 = np.array([wA1.diagonal()]).T - psi11 = 2 * sig2 ** 2 * tr11 + (mu4 - 3 * sig2 ** 2) * np.dot(vecd1.T, vecd1) - psi12 = sig2 ** 2 * tr12 - psi22 = sig2 ** 2 * tr22 + psi11 = 2 * sig2**2 * tr11 + (mu4 - 3 * sig2**2) * np.dot(vecd1.T, vecd1) + psi12 = sig2**2 * tr12 + psi22 = sig2**2 * tr22 a1, a2, p = 0.0, 0.0, 0.0 @@ -1621,7 +1653,7 @@ def get_omega_hom(w, wA1, wA2, reg, lamb, G): z_s = get_spFilter(w, lamb, reg.z) u_s = get_spFilter(w, lamb, reg.u) sig2 = np.dot(u_s.T, u_s) / n - mu3 = np.sum(u_s ** 3) / n + mu3 = np.sum(u_s**3) / n vecdA1 = np.array([wA1.diagonal()]).T psi, a1, a2, p = get_vc_hom(w, wA1, wA2, reg, lamb, z_s) j = np.dot(G, np.array([[1.0], [2 * lamb]])) @@ -1678,7 +1710,7 @@ def get_omega_hom_ols(w, wA1, wA2, reg, lamb, G): oDD = sig2 * la.inv(spdot(x_s.T, x_s)) oLL = la.inv(spdot(j.T, spdot(psii, j))) / n # oDL = np.zeros((oDD.shape[0], oLL.shape[1])) - mu3 = np.sum(u_s ** 3) / n + mu3 = np.sum(u_s**3) / n psiDL = (mu3 * spdot(reg.x.T, np.hstack((vecdA1, np.zeros((int(n), 1)))))) / n oDL = spdot(spdot(spdot(p.T, psiDL), spdot(psii, j)), oLL) @@ -1702,9 +1734,9 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49,1)) + y = np.reshape(y, (49, 1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -1716,12 +1748,23 @@ def _test(): q = np.array(q).T w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = 'r' - #reg = GM_Error_Hom(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) - #reg = GM_Endog_Error_Hom(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + w.transform = "r" + # reg = GM_Error_Hom(y, X, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True) + # reg = GM_Endog_Error_Hom(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True) - reg = GM_Combo_Hom(y, X, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], name_q=['discbd'], - name_ds='columbus', vm=True) + reg = GM_Combo_Hom( + y, + X, + yd, + q, + w=w, + name_x=["inc"], + name_y="hoval", + name_yend=["crime"], + name_q=["discbd"], + name_ds="columbus", + vm=True, + ) print(reg.output) - print(reg.summary) \ No newline at end of file + print(reg.summary) diff --git a/spreg/error_sp_hom_regimes.py b/spreg/error_sp_hom_regimes.py index f4a8804..2fcba35 100644 --- a/spreg/error_sp_hom_regimes.py +++ b/spreg/error_sp_hom_regimes.py @@ -1,5 +1,5 @@ """ -Hom family of models with regimes. +Hom family of models with regimes. """ __author__ = "Luc Anselin luc.anselin@asu.edu, Pedro V. Amaral pedro.amaral@asu.edu, Daniel Arribas-Bel darribas@asu.edu" @@ -30,8 +30,8 @@ import pandas as pd from .output import output, _summary_iteration, _spat_pseudo_r2 -class GM_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): +class GM_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): """ GMM method for a spatial error model with homoskedasticity, with regimes, results and diagnostics; based on Drukker et al. (2013) :cite:`Drukker2013`, following @@ -336,7 +336,6 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -355,7 +354,7 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags >0: + if slx_lags > 0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -451,23 +450,36 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED MODEL (HOM) - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HOM) - REGIMES" + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (Error-HOM) - REGIMES" self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} self.A1 = A1 - self.output = pd.DataFrame(self.name_x, - columns=['var_names']) - self.output['var_type'] = ['x']*(len(self.name_x)-1)+['lambda'] - self.output['regime'] = x_rlist + ['_Global'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"] = x_rlist + ["_Global"] + self.output["equation"] = 0 self.other_top = _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _error_hom_regimes_multi( - self, y, x, regimes, w, slx_lags, cores, max_iter, epsilon, A1, cols2regi, vm, name_x, latex, hard_bound): - + self, + y, + x, + regimes, + w, + slx_lags, + cores, + max_iter, + epsilon, + A1, + cols2regi, + vm, + name_x, + latex, + hard_bound, + ): regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -549,7 +561,9 @@ def _error_hom_regimes_multi( results = {} counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -566,26 +580,31 @@ def _error_hom_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.e_filtered[ - regi_ids[r], - ] = results[r].e_filtered + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.e_filtered[regi_ids[r],] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x results[r].A1 = A1 results[r].other_top = _summary_iteration(results[r]) - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, - 'var_type': ['x'] * (len(results[r].name_x) - 1) + - ['lambda'], - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_x, + "var_type": ["x"] * (len(results[r].name_x) - 1) + + ["lambda"], + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -593,7 +612,6 @@ def _error_hom_regimes_multi( class GM_Endog_Error_Hom_Regimes(RegressionPropsY, REGI.Regimes_Frame): - """ GMM method for a spatial error model with homoskedasticity, regimes and endogenous variables. @@ -954,7 +972,6 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -1109,17 +1126,20 @@ def __init__( self.chow = REGI.Chow(self) self._cache = {} self.A1 = A1 - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] - self.output['regime'] = x_rlist + yend_rlist + ['_Global'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + self.output["var_type"] = ( + ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] + ) + self.output["regime"] = x_rlist + yend_rlist + ["_Global"] + self.output["equation"] = 0 if summ: self.other_top = _summary_iteration(self) if slx_lags == 0: - self.title = ("GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIMES") + self.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIMES" else: - self.title = ("GM SPATIALLY WEIGHTED 2SLS WITH SLX (Error-HOM) - REGIMES") + self.title = ( + "GM SPATIALLY WEIGHTED 2SLS WITH SLX (Error-HOM) - REGIMES" + ) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _endog_error_hom_regimes_multi( @@ -1144,7 +1164,6 @@ def _endog_error_hom_regimes_multi( latex, hard_bound, ): - regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -1247,7 +1266,9 @@ def _endog_error_hom_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -1264,18 +1285,12 @@ def _endog_error_hom_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.e_filtered[ - regi_ids[r], - ] = results[r].e_filtered + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.e_filtered[regi_ids[r],] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend @@ -1283,22 +1298,37 @@ def _endog_error_hom_regimes_multi( self.name_z += results[r].name_z self.name_h += results[r].name_h if add_lag != False: - self.predy_e[ - regi_ids[r], - ] = results[r].predy_e - self.e_pred[ - regi_ids[r], - ] = results[r].e_pred + self.predy_e[regi_ids[r],] = results[r].predy_e + self.e_pred[regi_ids[r],] = results[r].e_pred results[r].other_top = _spat_pseudo_r2(results[r]) - v_type = ['x'] * len(results[r].name_x) + ['yend'] * (len(results[r].name_yend)-1) + ['rho','lambda'] + v_type = ( + ["x"] * len(results[r].name_x) + + ["yend"] * (len(results[r].name_yend) - 1) + + ["rho", "lambda"] + ) else: results[r].other_top = "" - v_type = ['x'] * len(results[r].name_x) + ['yend'] * len(results[r].name_yend) + ['lambda'] + v_type = ( + ["x"] * len(results[r].name_x) + + ["yend"] * len(results[r].name_yend) + + ["lambda"] + ) results[r].A1 = A1 results[r].other_top += _summary_iteration(results[r]) - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, - 'var_type': v_type, - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_z, + "var_type": v_type, + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -1306,7 +1336,6 @@ def _endog_error_hom_regimes_multi( class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): - """ GMM method for a spatial lag and error model with homoskedasticity, regimes and endogenous variables, with results and diagnostics; @@ -1354,7 +1383,7 @@ class GM_Combo_Hom_Regimes(GM_Endog_Error_Hom_Regimes): the spatial parameter is fixed across regimes. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the GNSM type. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1709,12 +1738,11 @@ def __init__( latex=False, hard_bound=False, ): - if regime_lag_sep and not regime_err_sep: - set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") regime_err_sep = True if regime_err_sep and not regime_lag_sep: - set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) @@ -1730,21 +1758,36 @@ def __init__( regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): - set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") + set_warn( + self, + "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.", + ) cols2regi = "all" if slx_lags > 0: - yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + yend2, q2, wx = set_endog( + y, x_constant, w, yend, q, w_lags, lag_q, slx_lags + ) x_constant = np.hstack((x_constant, wx)) name_slx = USER.set_name_spatial_lags(name_x, slx_lags) - name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) - name_x += name_slx - cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) + name_q.extend( + USER.set_name_q_sp( + name_slx[-len(name_x) :], w_lags, name_q, lag_q, force_all=True + ) + ) + name_x += name_slx + cols2regi = REGI.check_cols2regi( + constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False + ) else: - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + name_q.extend( + USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True) + ) yend2, q2 = yend, q - cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) - + cols2regi = REGI.check_cols2regi( + constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False + ) + self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, n, x_constant.shape[1]) @@ -1763,7 +1806,6 @@ def __init__( if slx_lags == 0: yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) - name_yend.append(USER.set_name_yend_sp(self.name_y)) GM_Endog_Error_Hom_Regimes.__init__( @@ -1805,8 +1847,10 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM) - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO WITH SLX (GNSM-HOM) - REGIMES" - self.output.iat[-2, self.output.columns.get_loc('var_type')] = 'rho' + self.title = ( + "GM SPATIALLY WEIGHTED 2SLS-COMBO WITH SLX (GNSM-HOM) - REGIMES" + ) + self.output.iat[-2, self.output.columns.get_loc("var_type")] = "rho" self.other_top = _spat_pseudo_r2(self) self.other_top += _summary_iteration(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -1833,14 +1877,20 @@ def _work_error( y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] model = BaseGM_Error_Hom( - y_r, x_r, w_r.sparse, max_iter=max_iter, epsilon=epsilon, A1=A1, hard_bound=hard_bound, + y_r, + x_r, + w_r.sparse, + max_iter=max_iter, + epsilon=epsilon, + A1=A1, + hard_bound=hard_bound, ) set_warn(model, warn) model.w = w_r if slx_lags == 0: model.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIME %s" % r else: - model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HOM) - REGIME %s" % r + model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HOM) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] @@ -1885,7 +1935,15 @@ def _work_endog_error( ) x_constant = USER.check_constant(x_r) model = BaseGM_Endog_Error_Hom( - y_r, x_r, yend_r, q_r, w_r.sparse, max_iter=max_iter, epsilon=epsilon, A1=A1, hard_bound=hard_bound, + y_r, + x_r, + yend_r, + q_r, + w_r.sparse, + max_iter=max_iter, + epsilon=epsilon, + A1=A1, + hard_bound=hard_bound, ) set_warn(model, warn) if add_lag != False: @@ -1896,19 +1954,19 @@ def _work_endog_error( set_warn(model, warn) if slx_lags == 0: if add_lag != False: - model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM)- REGIME %s" % r + model.title = "GM SPATIALLY WEIGHTED 2SLS-COMBO MODEL (HOM)- REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS (HOM) - REGIME %s" % r else: if add_lag != False: - model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HOM) - REGIME %s" % r + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM-HOM) - REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (Error-HOM) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + [str(r)+"_lambda"] + model.name_z = model.name_x + model.name_yend + [str(r) + "_lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -1931,9 +1989,9 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'),'r') + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") y = np.array(db.by_col("HOVAL")) - y = np.reshape(y, (49,1)) + y = np.reshape(y, (49, 1)) X = [] X.append(db.by_col("INC")) X = np.array(X).T @@ -1944,16 +2002,30 @@ def _test(): q.append(db.by_col("DISCBD")) q = np.array(q).T - r_var = 'NSA' + r_var = "NSA" regimes = db.by_col(r_var) w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) - w.transform = 'r' - #reg = GM_Error_Hom_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, + w.transform = "r" + # reg = GM_Error_Hom_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', vm=True, # regime_err_sep=True) - #reg = GM_Endog_Error_Hom_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], + # reg = GM_Endog_Error_Hom_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus',vm=True, regime_err_sep=True) - reg = GM_Combo_Hom_Regimes(y, X, regimes, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], - name_q=['discbd'], name_ds='columbus', vm=True, regime_err_sep=False, regime_lag_sep=False) + reg = GM_Combo_Hom_Regimes( + y, + X, + regimes, + yd, + q, + w=w, + name_x=["inc"], + name_y="hoval", + name_yend=["crime"], + name_q=["discbd"], + name_ds="columbus", + vm=True, + regime_err_sep=False, + regime_lag_sep=False, + ) print(reg.output) - print(reg.summary) \ No newline at end of file + print(reg.summary) diff --git a/spreg/error_sp_regimes.py b/spreg/error_sp_regimes.py index a50543d..e7d4556 100644 --- a/spreg/error_sp_regimes.py +++ b/spreg/error_sp_regimes.py @@ -18,12 +18,19 @@ from .sputils import sphstack import pandas as pd from .output import output, _spat_pseudo_r2 -from .error_sp_het_regimes import GM_Error_Het_Regimes, GM_Endog_Error_Het_Regimes, GM_Combo_Het_Regimes -from .error_sp_hom_regimes import GM_Error_Hom_Regimes, GM_Endog_Error_Hom_Regimes, GM_Combo_Hom_Regimes +from .error_sp_het_regimes import ( + GM_Error_Het_Regimes, + GM_Endog_Error_Het_Regimes, + GM_Combo_Het_Regimes, +) +from .error_sp_hom_regimes import ( + GM_Error_Hom_Regimes, + GM_Endog_Error_Hom_Regimes, + GM_Combo_Hom_Regimes, +) class GM_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): - """ GMM method for a spatial error model with regimes, with results and diagnostics; based on Kelejian and Prucha (1998, 1999) :cite:`Kelejian1998` :cite:`Kelejian1999`. @@ -293,16 +300,14 @@ def __init__( name_regimes=None, latex=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) - x_constant, name_x, warn = USER.check_constant( - x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags >0: + if slx_lags > 0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -327,15 +332,23 @@ def __init__( if regime_err_sep == True: if set(cols2regi) == set([True]): self._error_regimes_multi( - y, x_constant, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex + y, + x_constant, + regimes, + w, + slx_lags, + cores, + cols2regi, + vm, + name_x, + latex, ) else: raise Exception( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack( - np.ones((x_constant.shape[0], 1)), x_constant) + x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) name_x = USER.set_name_x(name_x, x_constant) self.x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( self, @@ -367,19 +380,20 @@ def __init__( if slx_lags == 0: self.title = "GM SPATIALLY WEIGHTED MODEL - REGIMES" else: - self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (SLX-Error) - REGIMES" + self.title = "GM SPATIALLY WEIGHTED MODEL + SLX (SLX-Error) - REGIMES" self.name_x.append("lambda") self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_x, - columns=['var_names']) - self.output['var_type'] = ['x']*(len(self.name_x)-1)+['lambda'] - self.output['regime'] = x_rlist + ['_Global'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"] = x_rlist + ["_Global"] + self.output["equation"] = 0 output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) - def _error_regimes_multi(self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex): + def _error_regimes_multi( + self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, name_x, latex + ): regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -453,7 +467,8 @@ def _error_regimes_multi(self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, results = {} self.name_y, self.name_x = [], [] self.output = pd.DataFrame( - columns=['var_names', 'var_type', 'regime', 'equation']) + columns=["var_names", "var_type", "regime", "equation"] + ) counter = 0 for r in self.regimes_set: """ @@ -468,27 +483,32 @@ def _error_regimes_multi(self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, results[r] = results_p[r].get() self.vm[ - (counter * self.kr): ((counter + 1) * self.kr), - (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * (self.kr + 1)): ((counter + 1) * (self.kr + 1)), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.e_filtered[ - regi_ids[r], - ] = results[r].e_filtered + self.betas[(counter * (self.kr + 1)) : ((counter + 1) * (self.kr + 1)),] = ( + results[r].betas + ) + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.e_filtered[regi_ids[r],] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, - 'var_type': ['x'] * (len(results[r].name_x) - 1) + - ['lambda'], - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_x, + "var_type": ["x"] * (len(results[r].name_x) - 1) + + ["lambda"], + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -496,7 +516,6 @@ def _error_regimes_multi(self, y, x, regimes, w, slx_lags, cores, cols2regi, vm, class GM_Endog_Error_Regimes(RegressionPropsY, REGI.Regimes_Frame): - """ GMM method for a spatial error model with regimes and endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -816,13 +835,11 @@ def __init__( add_lag=False, latex=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) - x_constant, name_x, warn = USER.check_constant( - x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) @@ -878,8 +895,7 @@ def __init__( "All coefficients must vary across regimes if regime_err_sep = True." ) else: - x_constant = sphstack( - np.ones((x_constant.shape[0], 1)), x_constant) + x_constant = sphstack(np.ones((x_constant.shape[0], 1)), x_constant) name_x = USER.set_name_x(name_x, x_constant) q, name_q = REGI.Regimes_Frame.__init__( self, q, regimes, constant_regi=None, cols2regi="all", names=name_q @@ -942,19 +958,20 @@ def __init__( self.kf += 1 self.chow = REGI.Chow(self) self._cache = {} - self.output = pd.DataFrame(self.name_z, - columns=['var_names']) - self.output['var_type'] = [ - 'x'] * len(self.name_x) + ['yend'] * len(self.name_yend) + ['lambda'] - self.output['regime'] = x_rlist + yend_rlist + ['_Global'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_z, columns=["var_names"]) + self.output["var_type"] = ( + ["x"] * len(self.name_x) + ["yend"] * len(self.name_yend) + ["lambda"] + ) + self.output["regime"] = x_rlist + yend_rlist + ["_Global"] + self.output["equation"] = 0 if summ: if slx_lags == 0: - self.title = ("GM SPATIALLY WEIGHTED 2SLS - REGIMES") + self.title = "GM SPATIALLY WEIGHTED 2SLS - REGIMES" else: - self.title = ("GM SPATIALLY WEIGHTED 2SLS WITH SLX (SLX-Error) - REGIMES") - output(reg=self, vm=vm, robust=False, - other_end=False, latex=latex) + self.title = ( + "GM SPATIALLY WEIGHTED 2SLS WITH SLX (SLX-Error) - REGIMES" + ) + output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _endog_error_regimes_multi( self, @@ -974,7 +991,6 @@ def _endog_error_regimes_multi( add_lag, latex, ): - regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -1070,7 +1086,8 @@ def _endog_error_regimes_multi( ) = ([], [], [], [], [], []) counter = 0 self.output = pd.DataFrame( - columns=['var_names', 'var_type', 'regime', 'equation']) + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -1084,21 +1101,15 @@ def _endog_error_regimes_multi( results[r] = results_p[r].get() self.vm[ - (counter * self.kr): ((counter + 1) * self.kr), - (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * (self.kr + 1)): ((counter + 1) * (self.kr + 1)), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.e_filtered[ - regi_ids[r], - ] = results[r].e_filtered + self.betas[(counter * (self.kr + 1)) : ((counter + 1) * (self.kr + 1)),] = ( + results[r].betas + ) + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.e_filtered[regi_ids[r],] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend @@ -1106,22 +1117,35 @@ def _endog_error_regimes_multi( self.name_z += results[r].name_z self.name_h += results[r].name_h if add_lag != False: - self.predy_e[ - regi_ids[r], - ] = results[r].predy_e - self.e_pred[ - regi_ids[r], - ] = results[r].e_pred + self.predy_e[regi_ids[r],] = results[r].predy_e + self.e_pred[regi_ids[r],] = results[r].e_pred results[r].other_top = _spat_pseudo_r2(results[r]) - v_type = ['x'] * len(results[r].name_x) + ['yend'] * \ - (len(results[r].name_yend) - 1) + ['rho', 'lambda'] + v_type = ( + ["x"] * len(results[r].name_x) + + ["yend"] * (len(results[r].name_yend) - 1) + + ["rho", "lambda"] + ) else: results[r].other_top = "" - v_type = ['x'] * len(results[r].name_x) + ['yend'] * \ - len(results[r].name_yend) + ['lambda'] - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_z, - 'var_type': v_type, - 'regime': r, 'equation': r})], ignore_index=True) + v_type = ( + ["x"] * len(results[r].name_x) + + ["yend"] * len(results[r].name_yend) + + ["lambda"] + ) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_z, + "var_type": v_type, + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -1129,7 +1153,6 @@ def _endog_error_regimes_multi( class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): - """ GMM method for a spatial lag and error model with regimes and endogenous variables, with results and diagnostics; based on Kelejian and Prucha (1998, @@ -1176,7 +1199,7 @@ class GM_Combo_Regimes(GM_Endog_Error_Regimes, REGI.Regimes_Frame): the spatial parameter is fixed accross regimes. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the GNSM type. + If slx_lags>0, the specification becomes of the GNSM type. w_lags : integer Orders of W to include as instruments for the spatially lagged dependent variable. For example, w_lags=1, then @@ -1494,17 +1517,16 @@ def __init__( latex=False, ): if regime_lag_sep and not regime_err_sep: - set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") regime_err_sep = True if regime_err_sep and not regime_lag_sep: - set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") + set_warn(self, "regime_err_sep set to False when regime_lag_sep=False.") regime_err_sep = False n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) - x_constant, name_x, warn = USER.check_constant( - x, name_x, just_rem=True) + x_constant, name_x, warn = USER.check_constant(x, name_x, just_rem=True) set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) self.name_y = USER.set_name_y(name_y) @@ -1513,20 +1535,35 @@ def __init__( regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) if regime_err_sep and any(col != True for col in cols2regi): - set_warn(self, "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.") + set_warn( + self, + "All coefficients must vary across regimes if regime_err_sep = True, so setting cols2regi = 'all'.", + ) cols2regi = "all" - + if slx_lags > 0: - yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + yend2, q2, wx = set_endog( + y, x_constant, w, yend, q, w_lags, lag_q, slx_lags + ) x_constant = np.hstack((x_constant, wx)) name_slx = USER.set_name_spatial_lags(name_x, slx_lags) - name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) - name_x += name_slx - cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False) + name_q.extend( + USER.set_name_q_sp( + name_slx[-len(name_x) :], w_lags, name_q, lag_q, force_all=True + ) + ) + name_x += name_slx + cols2regi = REGI.check_cols2regi( + constant_regi, cols2regi, x_constant[:, :-1], yend=yend2, add_cons=False + ) else: - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + name_q.extend( + USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True) + ) yend2, q2 = yend, q - cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + cols2regi = REGI.check_cols2regi( + constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False + ) self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes @@ -1546,7 +1583,7 @@ def __init__( cols2regi += [False] if slx_lags == 0: yend2, q2 = set_endog(y, x_constant, w, yend2, q2, w_lags, lag_q) - + name_yend.append(USER.set_name_yend_sp(self.name_y)) print(cols2regi, x_constant.shape[1], yend2.shape[1], name_x, name_yend, name_q) @@ -1584,18 +1621,25 @@ def __init__( if slx_lags == 0: self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIMES" else: - self.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO WITH SLX (GNSM) - REGIMES" - self.output.iat[-2, - self.output.columns.get_loc('var_type')] = 'rho' + self.title = ( + "SPATIALLY WEIGHTED 2SLS - GM-COMBO WITH SLX (GNSM) - REGIMES" + ) + self.output.iat[-2, self.output.columns.get_loc("var_type")] = "rho" self.other_top = _spat_pseudo_r2(self) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) -class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regimes, - GM_Error_Het_Regimes, GM_Combo_Het_Regimes, GM_Endog_Error_Het_Regimes, - GM_Error_Hom_Regimes, GM_Combo_Hom_Regimes, GM_Endog_Error_Hom_Regimes - ): - +class GMM_Error_Regimes( + GM_Error_Regimes, + GM_Combo_Regimes, + GM_Endog_Error_Regimes, + GM_Error_Het_Regimes, + GM_Combo_Het_Regimes, + GM_Endog_Error_Het_Regimes, + GM_Error_Hom_Regimes, + GM_Combo_Hom_Regimes, + GM_Endog_Error_Hom_Regimes, +): """ Wrapper function to call any of the GM methods for a spatial error regimes model available in spreg @@ -1640,12 +1684,12 @@ class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regim regime_err_sep: boolean If True, a separate regression is run for each regime. regime_lag_sep: boolean - Always False, kept for consistency, ignored. + Always False, kept for consistency, ignored. add_wy : boolean - If True, then a spatial lag of the dependent variable is included. + If True, then a spatial lag of the dependent variable is included. slx_lags : integer Number of spatial lags of X to include in the model specification. - If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. + If slx_lags>0, the specification becomes of the SLX-Error or GNSM type. vm : boolean If True, include variance-covariance matrix in summary results @@ -1660,7 +1704,7 @@ class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regim name_yend : list of strings Names of endogenous variables for use in output name_q : list of strings - Names of instruments for use in output + Names of instruments for use in output name_ds : string Name of dataset for use in output latex : boolean @@ -1669,7 +1713,7 @@ class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regim If true, raises an exception if the estimated spatial autoregressive parameter is outside the maximum/minimum bounds. **kwargs : keywords - Additional arguments to pass on to the estimators. + Additional arguments to pass on to the estimators. See the specific functions for details on what can be used. Attributes @@ -1765,12 +1809,12 @@ class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regim name_q : list of strings (optional) Names of external instruments name_h : list of strings (optional) - Names of all instruments used in ouput + Names of all instruments used in ouput multi : dictionary Only available when multiple regressions are estimated, i.e. when regime_err_sep=True and no variable is fixed across regimes. - Contains all attributes of each individual regression + Contains all attributes of each individual regression Examples -------- @@ -1900,74 +1944,339 @@ class GMM_Error_Regimes(GM_Error_Regimes, GM_Combo_Regimes, GM_Endog_Error_Regim 9 1_W_UE90 -0.708492 0.167057 -4.24102 0.000022 10 _Global_W_HR90 1.033956 0.269252 3.840111 0.000123 11 lambda -0.384968 0.192256 -2.002366 0.045245 - + """ def __init__( - self, y, x, regimes, w, yend=None, q=None, estimator='het', constant_regi="many", cols2regi="all", regime_err_sep=False, - regime_lag_sep=False, add_wy=False, slx_lags=0, vm=False, name_y=None, name_x=None, name_w=None, name_regimes=None, name_yend=None, - name_q=None, name_ds=None, latex=False, **kwargs): - - if estimator == 'het': + self, + y, + x, + regimes, + w, + yend=None, + q=None, + estimator="het", + constant_regi="many", + cols2regi="all", + regime_err_sep=False, + regime_lag_sep=False, + add_wy=False, + slx_lags=0, + vm=False, + name_y=None, + name_x=None, + name_w=None, + name_regimes=None, + name_yend=None, + name_q=None, + name_ds=None, + latex=False, + **kwargs, + ): + if estimator == "het": if yend is None and not add_wy: - GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Error_Het_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) elif yend is not None and not add_wy: - GM_Endog_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Endog_Error_Het_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) elif add_wy: - GM_Combo_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Combo_Het_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + regime_lag_sep=regime_lag_sep, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') - GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) - elif estimator == 'hom': + set_warn( + self, + "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", + ) + GM_Error_Het_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + ) + elif estimator == "hom": if yend is None and not add_wy: - GM_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Error_Hom_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) elif yend is not None and not add_wy: - GM_Endog_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Endog_Error_Hom_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) elif add_wy: - GM_Combo_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Combo_Hom_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + regime_lag_sep=regime_lag_sep, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') - GM_Error_Hom_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) - elif estimator == 'kp98': + set_warn( + self, + "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", + ) + GM_Error_Hom_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + ) + elif estimator == "kp98": if yend is None and not add_wy: - GM_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Error_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) elif yend is not None and not add_wy: - GM_Endog_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Endog_Error_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) elif add_wy: - GM_Combo_Regimes.__init__(self, y=y, x=x, regimes=regimes, yend=yend, q=q, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, regime_lag_sep=regime_lag_sep, - name_yend=name_yend, name_q=name_q, name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex, **kwargs) + GM_Combo_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + yend=yend, + q=q, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + regime_lag_sep=regime_lag_sep, + name_yend=name_yend, + name_q=name_q, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + **kwargs, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') - GM_Error_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) + set_warn( + self, + "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", + ) + GM_Error_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + ) else: - set_warn(self, 'Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.') - GM_Error_Het_Regimes.__init__(self, y=y, x=x, regimes=regimes, w=w, slx_lags=slx_lags, vm=vm, name_y=name_y, name_x=name_x, - constant_regi=constant_regi, cols2regi=cols2regi, regime_err_sep=regime_err_sep, - name_w=name_w, name_regimes=name_regimes, name_ds=name_ds, latex=latex) + set_warn( + self, + "Combination of arguments passed to GMM_Error_Regimes not allowed. Using default arguments instead.", + ) + GM_Error_Het_Regimes.__init__( + self, + y=y, + x=x, + regimes=regimes, + w=w, + slx_lags=slx_lags, + vm=vm, + name_y=name_y, + name_x=name_x, + constant_regi=constant_regi, + cols2regi=cols2regi, + regime_err_sep=regime_err_sep, + name_w=name_w, + name_regimes=name_regimes, + name_ds=name_ds, + latex=latex, + ) + def _work_error(y, x, regi_ids, r, w, name_ds, name_y, name_x, name_w, name_regimes): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) @@ -2020,26 +2329,25 @@ def _work_endog_error( if add_lag != False: model.rho = model.betas[-2] model.predy_e, model.e_pred, warn = sp_att( - w_r, model.y, model.predy, model.yend[:, - - 1].reshape(model.n, 1), model.rho + w_r, model.y, model.predy, model.yend[:, -1].reshape(model.n, 1), model.rho ) set_warn(model, warn) model.w = w_r if slx_lags == 0: if add_lag != False: - model.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIME %s" % r + model.title = "SPATIALLY WEIGHTED 2SLS - GM-COMBO MODEL - REGIME %s" % r else: model.title = "SPATIALLY WEIGHTED 2SLS (GM) - REGIME %s" % r else: if add_lag != False: - model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM) - REGIME %s" % r + model.title = "GM SPATIAL COMBO MODEL + SLX (GNSM) - REGIME %s" % r else: model.title = "GM SPATIALLY WEIGHTED 2SLS + SLX (SLX-Error) - REGIME %s" % r model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] model.name_yend = ["%s_%s" % (str(r), i) for i in name_yend] - model.name_z = model.name_x + model.name_yend + [str(r)+"_lambda"] + model.name_z = model.name_x + model.name_yend + [str(r) + "_lambda"] model.name_q = ["%s_%s" % (str(r), i) for i in name_q] model.name_h = model.name_x + model.name_q model.name_w = name_w @@ -2061,7 +2369,7 @@ def _test(): import numpy as np import libpysal - db = libpysal.io.open(libpysal.examples.get_path('columbus.dbf'), 'r') + db = libpysal.io.open(libpysal.examples.get_path("columbus.dbf"), "r") y = np.array(db.by_col("HOVAL")) y = np.reshape(y, (49, 1)) X = [] @@ -2074,17 +2382,29 @@ def _test(): q.append(db.by_col("DISCBD")) q = np.array(q).T - r_var = 'NSA' + r_var = "NSA" regimes = db.by_col(r_var) - w = libpysal.weights.Rook.from_shapefile( - libpysal.examples.get_path("columbus.shp")) - w.transform = 'r' + w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("columbus.shp")) + w.transform = "r" # reg = GM_Error_Regimes(y, X, regimes, w=w, name_x=['inc'], name_y='hoval', name_ds='columbus', # regime_err_sep=True) # reg = GM_Endog_Error_Regimes(y, X, yd, q, regimes, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], # name_q=['discbd'], name_ds='columbus', regime_err_sep=True) - reg = GM_Combo_Regimes(y, X, regimes, yd, q, w=w, name_x=['inc'], name_y='hoval', name_yend=['crime'], - name_q=['discbd'], name_ds='columbus', regime_err_sep=True, regime_lag_sep=True) + reg = GM_Combo_Regimes( + y, + X, + regimes, + yd, + q, + w=w, + name_x=["inc"], + name_y="hoval", + name_yend=["crime"], + name_q=["discbd"], + name_ds="columbus", + regime_err_sep=True, + regime_lag_sep=True, + ) print(reg.output) print(reg.summary) diff --git a/spreg/ml_error.py b/spreg/ml_error.py index 363ec5d..f905d6f 100644 --- a/spreg/ml_error.py +++ b/spreg/ml_error.py @@ -31,7 +31,6 @@ class BaseML_Error(RegressionPropsY, RegressionPropsVM, REGI.Regimes_Frame): - """ ML estimation of the spatial error model (note no consistency checks, diagnostics or constants added): :cite:`Anselin1988` @@ -268,7 +267,7 @@ def __init__(self, y, x, w, method="full", epsilon=0.0000001, regimes_att=None): tr3 = waiTwai.diagonal().sum() v1 = np.vstack((tr2 + tr3, tr1 / self.sig2)) - v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2))) + v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2**2))) v = np.hstack((v1, v2)) @@ -297,7 +296,6 @@ def get_x_lag(self, w, regimes_att): class ML_Error(BaseML_Error): - """ ML estimation of the spatial error model with all results and diagnostics; :cite:`Anselin1988` @@ -488,16 +486,24 @@ def __init__( y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # initialize in case None includes constant + name_x = USER.set_name_x( + name_x, x_constant + ) # initialize in case None includes constant set_warn(self, warn) self.title = "ML SPATIAL ERROR" - if slx_lags >0: - # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) - # x_constant = np.hstack((x_constant, lag_x)) -# name_x += USER.set_name_spatial_lags(name_x, slx_lags) - # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant from name_x - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # name_x += USER.set_name_spatial_lags(name_x, slx_lags) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant from name_x + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) self.title += " WITH SLX (SLX-Error)" self.title += " (METHOD = " + method + ")" @@ -512,9 +518,9 @@ def __init__( self.name_w = USER.set_name_w(name_w, w) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _nonspat_top(self, ml=True) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) @@ -594,6 +600,7 @@ def _test(): doctest.testmod() np.set_printoptions(suppress=start_suppress) + if __name__ == "__main__": _test() diff --git a/spreg/ml_error_regimes.py b/spreg/ml_error_regimes.py index cf3a0f0..61c561f 100644 --- a/spreg/ml_error_regimes.py +++ b/spreg/ml_error_regimes.py @@ -21,7 +21,6 @@ class ML_Error_Regimes(BaseML_Error, REGI.Regimes_Frame): - """ ML estimation of the spatial error model with regimes (note no consistency checks, diagnostics or constants added); :cite:`Anselin1988` @@ -300,7 +299,6 @@ def __init__( name_regimes=None, latex=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -317,7 +315,7 @@ def __init__( set_warn(self, warn) name_x = USER.set_name_x(name_x, x_constant, constant=True) - if slx_lags >0: + if slx_lags > 0: lag_x = get_lags(w, x_constant, slx_lags) x_constant = np.hstack((x_constant, lag_x)) name_x += USER.set_name_spatial_lags(name_x, slx_lags) @@ -325,7 +323,7 @@ def __init__( self.name_x_r = USER.set_name_x(name_x, x_constant) cols2regi = REGI.check_cols2regi(constant_regi, cols2regi, x_constant) - self.cols2regi = cols2regi + self.cols2regi = cols2regi self.regimes_set = REGI._get_regimes_set(regimes) self.regimes = regimes USER.check_regimes(self.regimes_set, self.n, x.shape[1]) @@ -365,7 +363,7 @@ def __init__( constant_regi=None, cols2regi=cols2regi, names=name_x, - rlist=True + rlist=True, ) BaseML_Error.__init__( self, @@ -378,7 +376,7 @@ def __init__( ) self.title = "ML SPATIAL ERROR" - if slx_lags >0: + if slx_lags > 0: self.title += " WITH SLX (SLX-Error)" self.title += " - REGIMES (METHOD = " + method + ")" @@ -388,17 +386,28 @@ def __init__( self.chow = REGI.Chow(self) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['lambda'] - self.output['regime'] = x_rlist + ['_Global'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["lambda"] + self.output["regime"] = x_rlist + ["_Global"] + self.output["equation"] = 0 self.other_top = _nonspat_top(self, ml=True) output(reg=self, vm=vm, robust=False, other_end=False, latex=latex) def _error_regimes_multi( - self, y, x, regimes, w, slx_lags, cores, method, epsilon, cols2regi, vm, name_x, latex + self, + y, + x, + regimes, + w, + slx_lags, + cores, + method, + epsilon, + cols2regi, + vm, + name_x, + latex, ): - regi_ids = dict( (r, list(np.where(np.array(regimes) == r)[0])) for r in self.regimes_set ) @@ -476,7 +485,9 @@ def _error_regimes_multi( results = {} counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -493,24 +504,30 @@ def _error_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.e_filtered[ - regi_ids[r], - ] = results[r].e_filtered + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.e_filtered[regi_ids[r],] = results[r].e_filtered self.name_y += results[r].name_y self.name_x += results[r].name_x results[r].other_top = _nonspat_top(results[r], ml=True) - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, - 'var_type': ['x'] * (len(results[r].name_x) - 1) + ['lambda'], - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_x, + "var_type": ["x"] * (len(results[r].name_x) - 1) + + ["lambda"], + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.chow = REGI.Chow(self) self.multi = results @@ -518,7 +535,19 @@ def _error_regimes_multi( def _work_error( - y, x, regi_ids, r, w, slx_lags, method, epsilon, name_ds, name_y, name_x, name_w, name_regimes + y, + x, + regi_ids, + r, + w, + slx_lags, + method, + epsilon, + name_ds, + name_y, + name_x, + name_w, + name_regimes, ): w_r, warn = REGI.w_regime(w, regi_ids[r], r, transform=True) y_r = y[regi_ids[r]] @@ -527,7 +556,7 @@ def _work_error( set_warn(model, warn) model.w = w_r model.title = "ML SPATIAL ERROR" - if slx_lags >0: + if slx_lags > 0: model.title += " WITH SLX (SLX-Error)" model.title += " - REGIME " + str(r) + " (METHOD = " + method + ")" model.name_ds = name_ds diff --git a/spreg/ml_lag.py b/spreg/ml_lag.py index 00a9c79..db96812 100755 --- a/spreg/ml_lag.py +++ b/spreg/ml_lag.py @@ -16,7 +16,13 @@ from . import diagnostics as DIAG from . import user_output as USER import pandas as pd -from .output import output, _nonspat_top, _spat_diag_out, _spat_pseudo_r2, _summary_impacts +from .output import ( + output, + _nonspat_top, + _spat_diag_out, + _spat_pseudo_r2, + _summary_impacts, +) from .w_utils import symmetrize from libpysal import weights @@ -31,7 +37,6 @@ class BaseML_Lag(RegressionPropsY, RegressionPropsVM): - """ ML estimation of the spatial lag model (note no consistency checks, diagnostics or constants added) :cite:`Anselin1988` @@ -195,9 +200,9 @@ def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001): ylag = weights.lag_spatial(w, y) # b0, b1, e0 and e1 -# now set in ML_Lag -# if slx_lags>0: -# self.x = np.hstack((self.x, get_lags(w, self.x[:, 1:], slx_lags))) + # now set in ML_Lag + # if slx_lags>0: + # self.x = np.hstack((self.x, get_lags(w, self.x[:, 1:], slx_lags))) self.n, self.k = self.x.shape xtx = spdot(self.x.T, self.x) @@ -219,19 +224,19 @@ def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001): bounds=(-1.0, 1.0), args=(self.n, e0, e1, W), method="bounded", - options={'xatol': epsilon}, + options={"xatol": epsilon}, ) elif methodML == "LU": I = sp.identity(w.n) Wsp = w.sparse # moved here - W = Wsp#.tocsc() + W = Wsp # .tocsc() res = minimize_scalar( lag_c_loglik_sp, 0.0, bounds=(-1.0, 1.0), args=(self.n, e0, e1, I, Wsp), method="bounded", - options={'xatol': epsilon}, + options={"xatol": epsilon}, ) elif methodML == "ORD": # check on symmetry structure @@ -249,7 +254,7 @@ def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001): bounds=(-1.0, 1.0), args=(self.n, e0, e1, evals), method="bounded", - options={'xatol': epsilon}, + options={"xatol": epsilon}, ) else: # program will crash, need to catch @@ -308,7 +313,7 @@ def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001): (xTwpy / self.sig2, tr2 + tr3 + wpyTwpy / self.sig2, tr1 / self.sig2) ) v3 = np.vstack( - (np.zeros((self.k, 1)), tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2)) + (np.zeros((self.k, 1)), tr1 / self.sig2, self.n / (2.0 * self.sig2**2)) ) v = np.hstack((v1, v2, v3)) @@ -318,7 +323,6 @@ def __init__(self, y, x, w, slx_lags=0, method="full", epsilon=0.0000001): class ML_Lag(BaseML_Lag): - """ ML estimation of the spatial lag model with all results and diagnostics; :cite:`Anselin1988` @@ -615,38 +619,56 @@ def __init__( y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # needs to be initialized for none, now with constant + name_x = USER.set_name_x( + name_x, x_constant + ) # needs to be initialized for none, now with constant set_warn(self, warn) method = method.upper() # using flex_wx kx = len(name_x) if slx_lags > 0: - x_constant,name_x = USER.flex_wx(w,x=x_constant,name_x=name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) - if isinstance(slx_vars,list): + x_constant, name_x = USER.flex_wx( + w, + x=x_constant, + name_x=name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) + if isinstance(slx_vars, list): kw = slx_vars.count(True) if kw < kx - 1: - spat_diag = False # no common factor test + spat_diag = False # no common factor test else: - kw = kx-1 - + kw = kx - 1 BaseML_Lag.__init__( - self, y=y, x=x_constant, w=w, slx_lags=slx_lags, method=method, epsilon=epsilon + self, + y=y, + x=x_constant, + w=w, + slx_lags=slx_lags, + method=method, + epsilon=epsilon, ) # increase by 1 to have correct aic and sc, include rho in count self.k += 1 - if slx_lags>0: - # kx = len(name_x) - # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - - self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + " (METHOD = " + method + ")" -# var_types = ['x'] * kx + ['wx'] * (kx-1) * slx_lags + ['rho'] - var_types = ['x'] * kx + ['wx'] * (kw) * slx_lags + ['rho'] + if slx_lags > 0: + # kx = len(name_x) + # name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant + + self.title = ( + "MAXIMUM LIKELIHOOD SPATIAL LAG WITH SLX - SPATIAL DURBIN MODEL" + + " (METHOD = " + + method + + ")" + ) + # var_types = ['x'] * kx + ['wx'] * (kx-1) * slx_lags + ['rho'] + var_types = ["x"] * kx + ["wx"] * (kw) * slx_lags + ["rho"] else: self.title = "MAXIMUM LIKELIHOOD SPATIAL LAG" + " (METHOD = " + method + ")" - var_types = ['x'] * len(name_x) + ['rho'] + var_types = ["x"] * len(name_x) + ["rho"] self.slx_lags = slx_lags self.slx_vars = slx_vars self.name_ds = USER.set_name_ds(name_ds) @@ -657,22 +679,25 @@ def __init__( self.name_w = USER.set_name_w(name_w, w) self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = var_types - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = var_types + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _spat_pseudo_r2(self) self.other_top += _nonspat_top(self, ml=True) diag_out = None - if spat_diag and slx_lags==1: - diag_out = _spat_diag_out(self, w, 'yend', ml=True) + if spat_diag and slx_lags == 1: + diag_out = _spat_diag_out(self, w, "yend", ml=True) if spat_impacts: - self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) + self.sp_multipliers, impacts_str = _summary_impacts( + self, w, spat_impacts, slx_lags, slx_vars + ) try: diag_out += impacts_str except TypeError: diag_out = impacts_str output(reg=self, vm=vm, robust=False, other_end=diag_out, latex=latex) + def lag_c_loglik(rho, n, e0, e1, W): # concentrated log-lik for lag model, no constants, brute force er = e0 - rho * e1 @@ -723,6 +748,7 @@ def _test(): doctest.testmod() np.set_printoptions(suppress=start_suppress) + if __name__ == "__main__": _test() diff --git a/spreg/ml_lag_regimes.py b/spreg/ml_lag_regimes.py index e5af2d3..bfaa695 100644 --- a/spreg/ml_lag_regimes.py +++ b/spreg/ml_lag_regimes.py @@ -12,14 +12,19 @@ from .ml_lag import BaseML_Lag from .utils import set_warn, get_lags import pandas as pd -from .output import output, _nonspat_top, _spat_diag_out, _spat_pseudo_r2, _summary_impacts +from .output import ( + output, + _nonspat_top, + _spat_diag_out, + _spat_pseudo_r2, + _summary_impacts, +) __all__ = ["ML_Lag_Regimes"] class ML_Lag_Regimes(BaseML_Lag, REGI.Regimes_Frame): - """ ML estimation of the spatial lag model with regimes (note no consistency checks, diagnostics or constants added) :cite:`Anselin1988`. @@ -332,7 +337,6 @@ def __init__( name_regimes=None, latex=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -349,7 +353,9 @@ def __init__( name_x += USER.set_name_spatial_lags(name_x, slx_lags) kwx = lag_x.shape[1] - self.name_x_r = USER.set_name_x(name_x, x_constant) + [USER.set_name_yend_sp(name_y)] + self.name_x_r = USER.set_name_x(name_x, x_constant) + [ + USER.set_name_yend_sp(name_y) + ] self.method = method self.epsilon = epsilon regimes, name_regimes = USER.check_reg_list(regimes, name_regimes, n) @@ -421,7 +427,7 @@ def __init__( constant_regi, cols2regi=cols2regi[:-1], names=name_x, - rlist=True + rlist=True, ) self.name_x.append("_Global_" + USER.set_name_yend_sp(name_y)) BaseML_Lag.__init__(self, y=y, x=x, w=w, method=method, epsilon=epsilon) @@ -432,31 +438,46 @@ def __init__( self.aic = DIAG.akaike(reg=self) self.schwarz = DIAG.schwarz(reg=self) self.regime_lag_sep = regime_lag_sep - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['regime'] = x_rlist + ['_Global'] - self.output['var_type'] = ['x'] * (len(self.name_x) - 1) + ['rho'] - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["regime"] = x_rlist + ["_Global"] + self.output["var_type"] = ["x"] * (len(self.name_x) - 1) + ["rho"] + self.output["equation"] = 0 self.slx_lags = slx_lags diag_out = None if slx_lags > 0: - self.title = ("MAXIMUM LIKELIHOOD SPATIAL DURBIN - REGIMES"+ " (METHOD = "+ method+ ")") - fixed_wx = cols2regi[-(kwx+1):-1].count(False) + self.title = ( + "MAXIMUM LIKELIHOOD SPATIAL DURBIN - REGIMES" + + " (METHOD = " + + method + + ")" + ) + fixed_wx = cols2regi[-(kwx + 1) : -1].count(False) kwx = kwx - fixed_wx if kwx > 0: for m in self.regimes_set: - r_output = self.output[(self.output['regime'] == str(m)) & (self.output['var_type'] == 'x')] + r_output = self.output[ + (self.output["regime"] == str(m)) + & (self.output["var_type"] == "x") + ] wx_index = r_output.index[-kwx:] - self.output.loc[wx_index, 'var_type'] = 'wx' + self.output.loc[wx_index, "var_type"] = "wx" if fixed_wx > 0: - f_wx_index = self.output.index[-(fixed_wx+1):-1] - self.output.loc[f_wx_index, 'var_type'] = 'wx' + f_wx_index = self.output.index[-(fixed_wx + 1) : -1] + self.output.loc[f_wx_index, "var_type"] = "wx" if spat_diag and slx_lags == 1: - diag_out = _spat_diag_out(self, w, 'yend', ml=True) + diag_out = _spat_diag_out(self, w, "yend", ml=True) else: - self.title = ("MAXIMUM LIKELIHOOD SPATIAL LAG - REGIMES"+ " (METHOD = "+ method+ ")") - + self.title = ( + "MAXIMUM LIKELIHOOD SPATIAL LAG - REGIMES" + + " (METHOD = " + + method + + ")" + ) + if spat_impacts: - self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags, regimes=True) + self.sp_multipliers, impacts_str = _summary_impacts( + self, w, spat_impacts, slx_lags, regimes=True + ) try: diag_out += impacts_str except TypeError: @@ -487,7 +508,7 @@ def ML_Lag_Regimes_Multi( name_ds, latex, ): - #pool = mp.Pool(cores) + # pool = mp.Pool(cores) results_p = {} """ for r in self.regimes_set: @@ -564,7 +585,9 @@ def ML_Lag_Regimes_Multi( results = {} self.name_y, self.name_x = [], [] counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -580,21 +603,13 @@ def ML_Lag_Regimes_Multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.predy_e[ - regi_ids[r], - ] = results[r].predy_e - self.e_pred[ - regi_ids[r], - ] = results[r].e_pred + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.predy_e[regi_ids[r],] = results[r].predy_e + self.e_pred[regi_ids[r],] = results[r].e_pred self.name_y += results[r].name_y self.name_x += results[r].name_x results[r].other_top = _spat_pseudo_r2(results[r]) @@ -602,17 +617,26 @@ def ML_Lag_Regimes_Multi( results[r].other_mid = "" if slx_lags > 0: kx = (len(results[r].name_x) - 1) // (slx_lags + 1) - var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['rho'] + var_types = ["x"] * (kx + 1) + ["wx"] * kx * slx_lags + ["rho"] else: - var_types = ['x'] * (len(results[r].name_x) - 1) + ['rho'] - results[r].output = pd.DataFrame({'var_names': results[r].name_x, - 'var_type': var_types, - 'regime': r, 'equation': r}) + var_types = ["x"] * (len(results[r].name_x) - 1) + ["rho"] + results[r].output = pd.DataFrame( + { + "var_names": results[r].name_x, + "var_type": var_types, + "regime": r, + "equation": r, + } + ) self.output = pd.concat([self.output, results[r].output], ignore_index=True) if spat_diag and slx_lags == 1: - results[r].other_mid += _spat_diag_out(results[r], None, 'yend', ml=True) + results[r].other_mid += _spat_diag_out( + results[r], None, "yend", ml=True + ) if spat_impacts: - results[r].sp_multipliers, impacts_str = _summary_impacts(results[r], results[r].w, spat_impacts, slx_lags) + results[r].sp_multipliers, impacts_str = _summary_impacts( + results[r], results[r].w, spat_impacts, slx_lags + ) results[r].other_mid += impacts_str counter += 1 self.multi = results @@ -639,9 +663,21 @@ def _work( x_r = x[regi_ids[r]] model = BaseML_Lag(y_r, x_r, w_r, method=method, epsilon=epsilon) if slx_lags == 0: - model.title = ("MAXIMUM LIKELIHOOD SPATIAL LAG - REGIME "+ str(r)+ " (METHOD = "+ method+ ")") + model.title = ( + "MAXIMUM LIKELIHOOD SPATIAL LAG - REGIME " + + str(r) + + " (METHOD = " + + method + + ")" + ) else: - model.title = ("MAXIMUM LIKELIHOOD SPATIAL DURBIN - REGIME "+ str(r)+ " (METHOD = "+ method+ ")") + model.title = ( + "MAXIMUM LIKELIHOOD SPATIAL DURBIN - REGIME " + + str(r) + + " (METHOD = " + + method + + ")" + ) model.name_ds = name_ds model.name_y = "%s_%s" % (str(r), name_y) model.name_x = ["%s_%s" % (str(r), i) for i in name_x] diff --git a/spreg/ols.py b/spreg/ols.py index a88a3ea..850d1ec 100644 --- a/spreg/ols.py +++ b/spreg/ols.py @@ -8,13 +8,12 @@ from . import robust as ROBUST from .utils import spdot, RegressionPropsY, RegressionPropsVM, set_warn, get_lags import pandas as pd -from libpysal import weights # needed for check on kernel weights in slx +from libpysal import weights # needed for check on kernel weights in slx __all__ = ["OLS"] class BaseOLS(RegressionPropsY, RegressionPropsVM): - """ Ordinary least squares (OLS) (note: no consistency checks, diagnostics or constant added) @@ -438,8 +437,8 @@ def __init__( w=None, robust=None, gwk=None, - slx_lags = 0, - slx_vars = "All", + slx_lags=0, + slx_vars="All", sig2n_k=True, nonspat_diag=True, spat_diag=False, @@ -454,37 +453,42 @@ def __init__( name_ds=None, latex=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) USER.check_robust(robust, gwk) if robust == "hac" and spat_diag: - set_warn( - self, - "Spatial diagnostics are not available for HAC estimation. Hence, spatial diagnostics have been disabled for this model.", - ) - spat_diag = False + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. Hence, spatial diagnostics have been disabled for this model.", + ) + spat_diag = False if robust in ["hac", "white"] and white_test: - set_warn( - self, - "White test not available when standard errors are estimated by HAC or White correction.", - ) - white_test = False + set_warn( + self, + "White test not available when standard errors are estimated by HAC or White correction.", + ) + white_test = False x_constant, name_x, warn = USER.check_constant(x, name_x) set_warn(self, warn) self.name_x = USER.set_name_x(name_x, x_constant) - + if spat_diag or moran: w = USER.check_weights(w, y, slx_lags=slx_lags, w_required=True) else: w = USER.check_weights(w, y, slx_lags=slx_lags) - if slx_lags >0: -# lag_x = get_lags(w, x_constant[:, 1:], slx_lags) -# x_constant = np.hstack((x_constant, lag_x)) -# self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) - x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + x_constant, self.name_x = USER.flex_wx( + w, + x=x_constant, + name_x=self.name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) BaseOLS.__init__( self, y=y, x=x_constant, robust=robust, gwk=gwk, sig2n_k=sig2n_k @@ -498,21 +502,24 @@ def __init__( self.robust = USER.set_robust(robust) self.name_w = USER.set_name_w(name_w, w) self.name_gwk = USER.set_name_w(name_gwk, gwk) - self.output = pd.DataFrame(self.name_x, columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) - self.output['regime'], self.output['equation'] = (0, 0) - self.other_top, self.other_mid, other_end = ("", "", "") # strings where function-specific diag. are stored + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * len(self.name_x) + self.output["regime"], self.output["equation"] = (0, 0) + self.other_top, self.other_mid, other_end = ( + "", + "", + "", + ) # strings where function-specific diag. are stored if nonspat_diag: self.other_mid += _nonspat_mid(self, white_test=white_test) self.other_top += _nonspat_top(self) if vif: self.other_mid += _summary_vif(self) if spat_diag: - other_end += _spat_diag_out(self, w, 'ols', moran=moran) + other_end += _spat_diag_out(self, w, "ols", moran=moran) output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) - def _test(): import doctest diff --git a/spreg/ols_regimes.py b/spreg/ols_regimes.py index dd6df85..4376ef4 100755 --- a/spreg/ols_regimes.py +++ b/spreg/ols_regimes.py @@ -9,12 +9,20 @@ import pandas as pd from . import regimes as REGI from . import user_output as USER -from .utils import set_warn, RegressionProps_basic, spdot, RegressionPropsY, get_lags, optim_k +from .utils import ( + set_warn, + RegressionProps_basic, + spdot, + RegressionPropsY, + get_lags, + optim_k, +) from .ols import BaseOLS from .robust import hac_multi from .output import output, _spat_diag_out, _nonspat_mid, _nonspat_top from .skater_reg import Skater_reg + class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): """ Ordinary least squares with results and diagnostics. @@ -387,33 +395,32 @@ class OLS_Regimes(BaseOLS, REGI.Regimes_Frame, RegressionPropsY): """ def __init__( - self, - y, - x, - regimes, - w=None, - robust=None, - gwk=None, - slx_lags=0, - sig2n_k=True, - nonspat_diag=True, - spat_diag=False, - moran=False, - white_test=False, - vm=False, - constant_regi="many", - cols2regi="all", - regime_err_sep=True, - cores=False, - name_y=None, - name_x=None, - name_regimes=None, - name_w=None, - name_gwk=None, - name_ds=None, - latex=False + self, + y, + x, + regimes, + w=None, + robust=None, + gwk=None, + slx_lags=0, + sig2n_k=True, + nonspat_diag=True, + spat_diag=False, + moran=False, + white_test=False, + vm=False, + constant_regi="many", + cols2regi="all", + regime_err_sep=True, + cores=False, + name_y=None, + name_x=None, + name_regimes=None, + name_w=None, + name_gwk=None, + name_ds=None, + latex=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) USER.check_robust(robust, gwk) @@ -469,9 +476,9 @@ def __init__( USER.check_regimes(self.regimes_set, self.n, x_constant.shape[1]) self.regime_err_sep = regime_err_sep if ( - regime_err_sep == True - and set(cols2regi) == set([True]) - and constant_regi == "many" + regime_err_sep == True + and set(cols2regi) == set([True]) + and constant_regi == "many" ): self.y = y regi_ids = dict( @@ -492,18 +499,17 @@ def __init__( name_x, moran, white_test, - latex + latex, ) else: x, self.name_x, x_rlist = REGI.Regimes_Frame.__init__( self, x_constant, regimes, constant_regi, cols2regi, name_x, rlist=True ) - self.output = pd.DataFrame(self.name_x, - columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) - self.output['regime'] = x_rlist - self.output['equation'] = 0 + self.output = pd.DataFrame(self.name_x, columns=["var_names"]) + self.output["var_type"] = ["x"] * len(self.name_x) + self.output["regime"] = x_rlist + self.output["equation"] = 0 BaseOLS.__init__(self, y=y, x=x, robust=robust, gwk=gwk, sig2n_k=sig2n_k) if regime_err_sep == True and robust == None: @@ -529,31 +535,37 @@ def __init__( self.title = "ORDINARY LEAST SQUARES WITH SLX - REGIMES" self.robust = USER.set_robust(robust) self.chow = REGI.Chow(self) - self.other_top, self.other_mid, other_end = ("", "", "") # strings where function-specific diag. are stored + self.other_top, self.other_mid, other_end = ( + "", + "", + "", + ) # strings where function-specific diag. are stored if nonspat_diag: self.other_mid += _nonspat_mid(self, white_test=white_test) self.other_top += _nonspat_top(self) if spat_diag: - other_end += _spat_diag_out(self, w, 'ols', moran=moran) #Must decide what to do with W. + other_end += _spat_diag_out( + self, w, "ols", moran=moran + ) # Must decide what to do with W. output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) def _ols_regimes_multi( - self, - x, - w, - regi_ids, - cores, - gwk, - slx_lags, - sig2n_k, - robust, - nonspat_diag, - spat_diag, - vm, - name_x, - moran, - white_test, - latex + self, + x, + w, + regi_ids, + cores, + gwk, + slx_lags, + sig2n_k, + robust, + nonspat_diag, + spat_diag, + vm, + name_x, + moran, + white_test, + latex, ): results_p = {} """ @@ -586,7 +598,7 @@ def _ols_regimes_multi( name_x, self.name_w, self.name_regimes, - slx_lags + slx_lags, ), ) else: @@ -604,7 +616,7 @@ def _ols_regimes_multi( name_x, self.name_w, self.name_regimes, - slx_lags + slx_lags, ) ) self.kryd = 0 @@ -627,7 +639,9 @@ def _ols_regimes_multi( results = {} self.name_y, self.name_x = [], [] counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -641,23 +655,30 @@ def _ols_regimes_multi( results[r] = results_p[r].get() self.vm[ - (counter * self.kr): ((counter + 1) * self.kr), - (counter * self.kr): ((counter + 1) * self.kr), + (counter * self.kr) : ((counter + 1) * self.kr), + (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr): ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy self.name_y += results[r].name_y self.name_x += results[r].name_x - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x, - 'var_type': ['x']*len(results[r].name_x), - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_x, + "var_type": ["x"] * len(results[r].name_x), + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) results[r].other_top, results[r].other_mid = ("", "") if nonspat_diag: results[r].other_mid += _nonspat_mid(results[r], white_test=white_test) @@ -671,7 +692,7 @@ def _ols_regimes_multi( other_end = "" if spat_diag: self._get_spat_diag_props(x_constant, sig2n_k) - #other_end += _spat_diag_out(self, w, 'ols', moran=moran) Need to consider W before implementing + # other_end += _spat_diag_out(self, w, 'ols', moran=moran) Need to consider W before implementing output(reg=self, vm=vm, robust=robust, other_end=other_end, latex=latex) def _get_spat_diag_props(self, x, sig2n_k): @@ -685,7 +706,19 @@ def _get_spat_diag_props(self, x, sig2n_k): def _work( - y, x, w, regi_ids, r, robust, sig2n_k, name_ds, name_y, name_x, name_w, name_regimes, slx_lags + y, + x, + w, + regi_ids, + r, + robust, + sig2n_k, + name_ds, + name_y, + name_x, + name_w, + name_regimes, + slx_lags, ): y_r = y[regi_ids[r]] x_r = x[regi_ids[r]] @@ -712,9 +745,7 @@ def _work( class OLS_Endog_Regimes(OLS_Regimes): - def __init__( - self, y, x, w, n_clusters=None, quorum=-np.inf, trace=True, **kwargs): - + def __init__(self, y, x, w, n_clusters=None, quorum=-np.inf, trace=True, **kwargs): n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True) @@ -724,29 +755,54 @@ def __init__( if not n_clusters: if quorum < 0: - quorum = np.max([(x.shape[1]+1)*10, 30]) - n_clusters_opt = x.shape[0]*0.70//quorum + quorum = np.max([(x.shape[1] + 1) * 10, 30]) + n_clusters_opt = x.shape[0] * 0.70 // quorum if n_clusters_opt < 2: raise ValueError( - "The combination of the values of `N` and `quorum` is not compatible with regimes estimation.") - sk_reg_results = Skater_reg().fit(n_clusters_opt, w, x_std, {'reg':BaseOLS,'y':y,'x':x}, quorum=quorum, trace=True) - n_clusters = optim_k([sk_reg_results._trace[i][1][2] for i in range(1, len(sk_reg_results._trace))]) - self.clusters = sk_reg_results._trace[n_clusters-1][0] + "The combination of the values of `N` and `quorum` is not compatible with regimes estimation." + ) + sk_reg_results = Skater_reg().fit( + n_clusters_opt, + w, + x_std, + {"reg": BaseOLS, "y": y, "x": x}, + quorum=quorum, + trace=True, + ) + n_clusters = optim_k( + [ + sk_reg_results._trace[i][1][2] + for i in range(1, len(sk_reg_results._trace)) + ] + ) + self.clusters = sk_reg_results._trace[n_clusters - 1][0] else: try: # Call the Skater_reg method based on OLS - sk_reg_results = Skater_reg().fit(n_clusters, w, x_std, {'reg':BaseOLS,'y':y,'x':x}, quorum=quorum, trace=trace) + sk_reg_results = Skater_reg().fit( + n_clusters, + w, + x_std, + {"reg": BaseOLS, "y": y, "x": x}, + quorum=quorum, + trace=trace, + ) self.clusters = sk_reg_results.current_labels_ except Exception as e: if str(e) == "one or more input arrays have more columns than rows": - raise ValueError("One or more input ended up with more variables than observations. Please check your setting for `quorum`.") + raise ValueError( + "One or more input ended up with more variables than observations. Please check your setting for `quorum`." + ) else: print("An error occurred:", e) self._trace = sk_reg_results._trace self.SSR = [self._trace[i][1][2] for i in range(1, len(self._trace))] - OLS_Regimes.__init__(self, y, x, regimes=self.clusters, w=w, name_regimes='Skater_reg', **kwargs) + OLS_Regimes.__init__( + self, y, x, regimes=self.clusters, w=w, name_regimes="Skater_reg", **kwargs + ) + def _test(): import doctest @@ -765,15 +821,15 @@ def _test(): db = libpysal.io.open(libpysal.examples.get_path("NAT.dbf"), "r") y_var = "HR90" - y = np.array(db.by_col(y_var)).reshape(-1,1) - x_var = ['PS90','UE90'] + y = np.array(db.by_col(y_var)).reshape(-1, 1) + x_var = ["PS90", "UE90"] x = np.array([db.by_col(name) for name in x_var]).T r_var = "SOUTH" regimes = db.by_col(r_var) w = libpysal.weights.Rook.from_shapefile(libpysal.examples.get_path("NAT.shp")) w.transform = "r" olsr = OLS_Regimes( - y, + y, x, regimes, w=w, @@ -788,7 +844,7 @@ def _test(): cols2regi=[True, True], sig2n_k=False, white_test=True, - #robust="white" + # robust="white" ) print(olsr.output) print(olsr.summary) diff --git a/spreg/output.py b/spreg/output.py index fbebe65..8c4e4b4 100755 --- a/spreg/output.py +++ b/spreg/output.py @@ -16,9 +16,10 @@ ############### Primary functions for running summary diagnostics ############# ############################################################################### + def output(reg, vm, other_end=False, robust=False, latex=False): strSummary = output_start(reg) - for eq in reg.output['equation'].unique(): + for eq in reg.output["equation"].unique(): try: reg.multi[eq].__summary = {} strSummary, reg.multi[eq] = out_part_top(strSummary, reg, eq) @@ -28,24 +29,32 @@ def output(reg, vm, other_end=False, robust=False, latex=False): strSummary, reg = out_part_middle(strSummary, reg, robust, m=eq, latex=latex) strSummary, reg = out_part_end(strSummary, reg, vm, other_end, m=eq) reg.summary = strSummary - reg.output.sort_values(by=['equation', 'regime'], inplace=True) - reg.output.drop(['var_type', 'regime', 'equation'], axis=1, inplace=True) + reg.output.sort_values(by=["equation", "regime"], inplace=True) + reg.output.drop(["var_type", "regime", "equation"], axis=1, inplace=True) + def output_start(reg): reg.__summary = {} strSummary = "REGRESSION RESULTS\n" strSummary += "------------------\n" - reg.output = reg.output.assign(coefficients=[None] * len(reg.output), std_err=[None] * len(reg.output), - zt_stat=[None] * len(reg.output), prob=[None] * len(reg.output)) + reg.output = reg.output.assign( + coefficients=[None] * len(reg.output), + std_err=[None] * len(reg.output), + zt_stat=[None] * len(reg.output), + prob=[None] * len(reg.output), + ) return strSummary + def out_part_top(strSummary, reg, m): # Top part of summary output. # m = None for single models, m = 1,2,3... for multiple equation models if m == None: _reg = reg # _reg = local object with regression results else: - _reg = reg.multi[m] # _reg = local object with equation specific regression results + _reg = reg.multi[ + m + ] # _reg = local object with equation specific regression results title = "\nSUMMARY OF OUTPUT: " + _reg.title + "\n" strSummary += title strSummary += "-" * (len(title) - 2) + "\n" @@ -76,7 +85,7 @@ def out_part_top(strSummary, reg, m): ) _reg.std_err = diagnostics.se_betas(_reg) - if 'OLS' in reg.__class__.__name__: + if "OLS" in reg.__class__.__name__: _reg.t_stat = diagnostics.t_stat(_reg) _reg.r2 = diagnostics.r2(_reg) _reg.ar2 = diagnostics.ar2(_reg) @@ -101,21 +110,28 @@ def out_part_top(strSummary, reg, m): return (strSummary, _reg) + def out_part_middle(strSummary, reg, robust, m=None, latex=False): # Middle part of summary output. # m = None for single models, m = 1,2,3... for multiple equation models - if m==None: - _reg = reg #_reg = local object with regression results - m = reg.output['equation'].unique()[0] + if m == None: + _reg = reg # _reg = local object with regression results + m = reg.output["equation"].unique()[0] else: - _reg = reg.multi[m] #_reg = local object with equation specific regression results - coefs = pd.DataFrame(_reg.betas, columns=['coefficients']) - coefs['std_err'] = pd.DataFrame(_reg.std_err) + _reg = reg.multi[ + m + ] # _reg = local object with equation specific regression results + coefs = pd.DataFrame(_reg.betas, columns=["coefficients"]) + coefs["std_err"] = pd.DataFrame(_reg.std_err) try: - coefs = pd.concat([coefs, pd.DataFrame(_reg.z_stat, columns=['zt_stat', 'prob'])], axis=1) + coefs = pd.concat( + [coefs, pd.DataFrame(_reg.z_stat, columns=["zt_stat", "prob"])], axis=1 + ) except AttributeError: - coefs = pd.concat([coefs, pd.DataFrame(_reg.t_stat, columns=['zt_stat', 'prob'])], axis=1) - coefs.index = reg.output[reg.output['equation'] == m].index + coefs = pd.concat( + [coefs, pd.DataFrame(_reg.t_stat, columns=["zt_stat", "prob"])], axis=1 + ) + coefs.index = reg.output[reg.output["equation"] == m].index reg.output.update(coefs) strSummary += "\n" if robust: @@ -124,41 +140,54 @@ def out_part_middle(strSummary, reg, robust, m=None, latex=False): elif robust == "hac": strSummary += "HAC Standard Errors; Kernel Weights: " + _reg.name_gwk + "\n" elif robust == "ogmm": - strSummary += "Optimal GMM used to estimate the coefficients and the variance-covariance matrix\n" + strSummary += "Optimal GMM used to estimate the coefficients and the variance-covariance matrix\n" strSummary += "------------------------------------------------------------------------------------\n" - - m_output = reg.output[reg.output['equation'] == m] + + m_output = reg.output[reg.output["equation"] == m] if latex: - df_1 = m_output.iloc[np.lexsort((m_output.index, m_output['regime']))] - df_2 = df_1.loc[:, ['var_names', 'coefficients', 'std_err', 'zt_stat', 'prob']] - df_2 = df_2.set_axis(['Variable', 'Coefficient', 'Std.Error', _reg.__summary['summary_zt']+'-Statistic', 'Prob.'], axis='columns', copy=False) - cols = df_2.columns.difference(['Variable']) + df_1 = m_output.iloc[np.lexsort((m_output.index, m_output["regime"]))] + df_2 = df_1.loc[:, ["var_names", "coefficients", "std_err", "zt_stat", "prob"]] + df_2 = df_2.set_axis( + [ + "Variable", + "Coefficient", + "Std.Error", + _reg.__summary["summary_zt"] + "-Statistic", + "Prob.", + ], + axis="columns", + copy=False, + ) + cols = df_2.columns.difference(["Variable"]) df_2[cols] = df_2[cols].astype(float).map(lambda x: "%12.5f" % x) - df_2['Variable'] = df_2['Variable'].str.replace("_", "\_").str.replace("%", "\%") - df_inlatex = df_2.style.hide(axis='index').to_latex(hrules=True) + df_2["Variable"] = ( + df_2["Variable"].str.replace("_", "\_").str.replace("%", "\%") + ) + df_inlatex = df_2.style.hide(axis="index").to_latex(hrules=True) strSummary += df_inlatex strSummary += "------------------------------------------------------------------------------------\n" - else: + else: strSummary += ( - " Variable Coefficient Std.Error %1s-Statistic Probability\n" - % (_reg.__summary["summary_zt"]) + " Variable Coefficient Std.Error %1s-Statistic Probability\n" + % (_reg.__summary["summary_zt"]) ) strSummary += "------------------------------------------------------------------------------------\n" - for row in m_output.iloc[np.lexsort((m_output.index, m_output['regime']))].itertuples(): + for row in m_output.iloc[ + np.lexsort((m_output.index, m_output["regime"])) + ].itertuples(): try: strSummary += "%20s %12.5f %12.5f %12.5f %12.5f\n" % ( row.var_names, row.coefficients, row.std_err, row.zt_stat, - row.prob - ) - except TypeError: # special case for models that do not have inference on the lambda term - strSummary += "%20s %12.5f \n" % ( - row.var_names, - row.coefficients + row.prob, ) + except ( + TypeError + ): # special case for models that do not have inference on the lambda term + strSummary += "%20s %12.5f \n" % (row.var_names, row.coefficients) strSummary += "------------------------------------------------------------------------------------\n" try: # Adding info on instruments if they are present @@ -182,8 +211,10 @@ def out_part_middle(strSummary, reg, robust, m=None, latex=False): pass try: # Adding info on regimes if they are present - strSummary += ("Regimes variable: %s\n" % _reg.name_regimes) - strSummary += _summary_chow(_reg) # If local regimes present, compute Chow test. + strSummary += "Regimes variable: %s\n" % _reg.name_regimes + strSummary += _summary_chow( + _reg + ) # If local regimes present, compute Chow test. except: pass @@ -200,6 +231,7 @@ def out_part_middle(strSummary, reg, robust, m=None, latex=False): return (strSummary, reg) + def out_part_end(strSummary, reg, vm, other_end, m=None): if m is not None: strSummary += "------------------------------------------------------------------------------------\n" @@ -219,6 +251,7 @@ def out_part_end(strSummary, reg, vm, other_end, m=None): strSummary += "================================ END OF REPORT =====================================" return (strSummary, reg) + def _summary_chow(reg): sum_text = "\nREGIMES DIAGNOSTICS - CHOW TEST" name_x_r = reg.name_x_r @@ -231,23 +264,38 @@ def _summary_chow(reg): if reg.constant_regi == "many": names_chow = ["CONSTANT"] + names_chow - if 'lambda' in reg.output.var_type.values: - if reg.output.var_type.value_counts()['lambda'] > 1: + if "lambda" in reg.output.var_type.values: + if reg.output.var_type.value_counts()["lambda"] > 1: names_chow += ["lambda"] reg.output_chow = pd.DataFrame() - reg.output_chow['var_names'] = names_chow - reg.output_chow['df'] = reg.nr - 1 - reg.output_chow = pd.concat([reg.output_chow, pd.DataFrame(regi, columns=['value', 'prob'])], axis=1) - reg.output_chow = pd.concat([reg.output_chow, pd.DataFrame([{'var_names': 'Global test', - 'df': reg.kr * (reg.nr - 1), - 'value': joint[0], 'prob': joint[1]}])], ignore_index=True) + reg.output_chow["var_names"] = names_chow + reg.output_chow["df"] = reg.nr - 1 + reg.output_chow = pd.concat( + [reg.output_chow, pd.DataFrame(regi, columns=["value", "prob"])], axis=1 + ) + reg.output_chow = pd.concat( + [ + reg.output_chow, + pd.DataFrame( + [ + { + "var_names": "Global test", + "df": reg.kr * (reg.nr - 1), + "value": joint[0], + "prob": joint[1], + } + ] + ), + ], + ignore_index=True, + ) for row in reg.output_chow.itertuples(): sum_text += "%20s %2d %12.3f %9.4f\n" % ( row.var_names, row.df, row.value, - row.prob + row.prob, ) return sum_text @@ -258,7 +306,8 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): cache = diagnostics_sp.spDcache(reg, w) if type == "yend": strSummary += ( - "TEST DF VALUE PROB\n") + "TEST DF VALUE PROB\n" + ) if not ml: mi, ak, ak_p = diagnostics_sp.akTest(reg, w, cache) reg.ak_test = ak, ak_p @@ -268,19 +317,33 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): reg.ak_test[0], reg.ak_test[1], ) - if any(reg.output['var_type'] == 'rho'): + if any(reg.output["var_type"] == "rho"): # no common factor test if slx_vars is not "All" - if reg.slx_lags == 1 and not any(reg.output['var_type'] == 'yend'): - if not hasattr(reg, 'slx_vars') or not isinstance(reg.slx_vars, list): - wx_indices = reg.output[(reg.output['var_type'] == 'wx') & (reg.output['regime'] != '_Global')].index + if reg.slx_lags == 1 and not any(reg.output["var_type"] == "yend"): + if not hasattr(reg, "slx_vars") or not isinstance(reg.slx_vars, list): + wx_indices = reg.output[ + (reg.output["var_type"] == "wx") + & (reg.output["regime"] != "_Global") + ].index x_indices = [] - for m in reg.output['regime'].unique(): - x_indices.extend(reg.output[(reg.output['regime'] == m) & (reg.output['var_type'] == 'x')].index[1:]) - vm_indices = x_indices + wx_indices.tolist() + reg.output[reg.output['var_type'] == 'rho'].index.tolist() - cft, cft_p = diagnostics_sp.comfac_test(reg.rho, - reg.betas[x_indices], - reg.betas[wx_indices], - reg.vm[vm_indices, :][:, vm_indices]) + for m in reg.output["regime"].unique(): + x_indices.extend( + reg.output[ + (reg.output["regime"] == m) + & (reg.output["var_type"] == "x") + ].index[1:] + ) + vm_indices = ( + x_indices + + wx_indices.tolist() + + reg.output[reg.output["var_type"] == "rho"].index.tolist() + ) + cft, cft_p = diagnostics_sp.comfac_test( + reg.rho, + reg.betas[x_indices], + reg.betas[wx_indices], + reg.vm[vm_indices, :][:, vm_indices], + ) reg.cfh_test = cft, cft_p strSummary += "%-27s %2d %12.3f %9.4f\n" % ( "Common Factor Hypothesis Test", @@ -299,15 +362,21 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): strSummary += ( "TEST MI/DF VALUE PROB\n" ) - lm_tests = diagnostics_sp.LMtests(reg, w, tests=["lme", "lml", "rlme", "rlml", "sarma"]) + lm_tests = diagnostics_sp.LMtests( + reg, w, tests=["lme", "lml", "rlme", "rlml", "sarma"] + ) if reg.slx_lags == 0: try: - lm_tests2 = diagnostics_sp.LMtests(reg, w, tests=["lmwx", "lmspdurbin", "rlmdurlag", "rlmwx","lmslxerr"]) + lm_tests2 = diagnostics_sp.LMtests( + reg, + w, + tests=["lmwx", "lmspdurbin", "rlmdurlag", "rlmwx", "lmslxerr"], + ) reg.lm_wx = lm_tests2.lmwx reg.lm_spdurbin = lm_tests2.lmspdurbin reg.rlm_wx = lm_tests2.rlmwx reg.rlm_durlag = lm_tests2.rlmdurlag - reg.lm_slxerr = lm_tests2.lmslxerr #currently removed. - LA reinstated + reg.lm_slxerr = lm_tests2.lmslxerr # currently removed. - LA reinstated koley_bera = True except: koley_bera = False @@ -316,7 +385,6 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): reg.rlm_error = lm_tests.rlme reg.rlm_lag = lm_tests.rlml reg.lm_sarma = lm_tests.sarma - if moran: moran_res = diagnostics_sp.MoranRes(reg, w, z=True) @@ -358,18 +426,16 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): reg.lm_sarma[1], ) if reg.slx_lags == 0 and koley_bera: - strSummary += ( - "\n- Spatial Durbin -\nTEST DF VALUE PROB\n" - ) + strSummary += "\n- Spatial Durbin -\nTEST DF VALUE PROB\n" strSummary += "%-27s %2d %12.3f %9.4f\n" % ( "LM test for WX", - reg.k-1, + reg.k - 1, reg.lm_wx[0], reg.lm_wx[1], ) strSummary += "%-27s %2d %12.3f %9.4f\n" % ( "Robust LM WX test", - reg.k-1, + reg.k - 1, reg.rlm_wx[0], reg.rlm_wx[1], ) @@ -391,18 +457,19 @@ def _spat_diag_out(reg, w, type, moran=False, ml=False): reg.lm_spdurbin[0], reg.lm_spdurbin[1], ) - #strSummary += ( + # strSummary += ( # "\n- Spatial Error and WX -\nTEST DF VALUE PROB\n" - #) - #strSummary += "%-27s %2d %12.3f %9.4f\n\n" % ( + # ) + # strSummary += "%-27s %2d %12.3f %9.4f\n\n" % ( # "Joint test for Error and WX", # reg.k, # reg.lm_slxerr[0], # reg.lm_slxerr[1], - #) + # ) return strSummary + def _nonspat_top(reg, ml=False): if not ml: reg.sig2ML = reg.sig2n @@ -412,7 +479,11 @@ def _nonspat_top(reg, ml=False): reg.schwarz = diagnostics.schwarz(reg) strSummary = "%-20s:%12.6g %-22s:%12.4f\n" % ( - "Sum squared residual", reg.utu, "F-statistic", reg.f_stat[0],) + "Sum squared residual", + reg.utu, + "F-statistic", + reg.f_stat[0], + ) strSummary += "%-20s:%12.3f %-22s:%12.4g\n" % ( "Sigma-square", reg.sig2, @@ -457,6 +528,7 @@ def _nonspat_top(reg, ml=False): return strSummary + def _nonspat_mid(reg, white_test=False): # compute diagnostics reg.mulColli = diagnostics.condition_index(reg) @@ -512,14 +584,16 @@ def _nonspat_mid(reg, white_test=False): pass return strSummary + def _spat_pseudo_r2(reg): if np.abs(reg.rho) < 1: reg.pr2_e = diagnostics_tsls.pr2_spatial(reg) strSummary = "%-20s: %5.4f\n" % ("Spatial Pseudo R-squared", reg.pr2_e) else: - strSummary = "Spatial Pseudo R-squared: omitted due to rho outside the boundary (-1, 1).\n" + strSummary = "Spatial Pseudo R-squared: omitted due to rho outside the boundary (-1, 1).\n" return strSummary + def _summary_vm(reg): strVM = "\n" strVM += "COEFFICIENTS VARIANCE MATRIX\n" @@ -539,6 +613,7 @@ def _summary_vm(reg): strVM += "\n" return strVM + def _summary_iteration(reg): """Reports the number of iterations computed and the type of estimator used for hom and het models.""" try: @@ -558,7 +633,7 @@ def _summary_iteration(reg): step1c, ) except: - pass + pass try: txt = txt[:-1] + " %-22s:%12s" % ( @@ -570,7 +645,8 @@ def _summary_iteration(reg): return txt -def _summary_impacts(reg, w, spat_impacts, slx_lags=0, slx_vars="All",regimes=False): + +def _summary_impacts(reg, w, spat_impacts, slx_lags=0, slx_vars="All", regimes=False): """ Spatial direct, indirect and total effects in spatial lag model. Uses multipliers computed by sputils._spmultipliers. @@ -596,14 +672,16 @@ def _summary_impacts(reg, w, spat_impacts, slx_lags=0, slx_vars="All",regimes=Fa except AttributeError: spat_impacts = [x.lower() for x in spat_impacts] - #variables = reg.output.query("var_type in ['x', 'yend'] and index != 0") # excludes constant - variables = reg.output.query("var_type == 'x' and index != 0") # excludes constant and endogenous variables + # variables = reg.output.query("var_type in ['x', 'yend'] and index != 0") # excludes constant + variables = reg.output.query( + "var_type == 'x' and index != 0" + ) # excludes constant and endogenous variables if regimes: - variables = variables[~variables['var_names'].str.endswith('_CONSTANT')] + variables = variables[~variables["var_names"].str.endswith("_CONSTANT")] variables_index = variables.index - if slx_lags==0: + if slx_lags == 0: strSummary = "\nSPATIAL LAG MODEL IMPACTS\n" else: strSummary = "\nSPATIAL DURBIN MODEL IMPACTS\n" @@ -617,20 +695,35 @@ def _summary_impacts(reg, w, spat_impacts, slx_lags=0, slx_vars="All",regimes=Fa sp_multipliers = {} for i in spat_impacts: - spmult = _spmultiplier(w, reg.rho, method=i) # computes the multipliers, slx_lags not needed - + spmult = _spmultiplier( + w, reg.rho, method=i + ) # computes the multipliers, slx_lags not needed + strSummary += spmult["warn"] - btot, bdir, bind = _sp_effects(reg, variables, spmult, slx_lags,slx_vars) # computes the impacts, needs slx_lags - sp_multipliers[spmult["method"]] = spmult['adi'], spmult['aii'].item(), spmult['ati'].item() + btot, bdir, bind = _sp_effects( + reg, variables, spmult, slx_lags, slx_vars + ) # computes the impacts, needs slx_lags + sp_multipliers[spmult["method"]] = ( + spmult["adi"], + spmult["aii"].item(), + spmult["ati"].item(), + ) strSummary += "Impacts computed using the '" + spmult["method"] + "' method.\n" - strSummary += " Variable Direct Indirect Total\n" + strSummary += ( + " Variable Direct Indirect Total\n" + ) for i in range(len(variables)): strSummary += "%20s %12.4f %12.4f %12.4f\n" % ( - variables['var_names'][variables_index[i]], bdir[i][0], bind[i][0], btot[i][0]) + variables["var_names"][variables_index[i]], + bdir[i][0], + bind[i][0], + btot[i][0], + ) return sp_multipliers, strSummary + def _summary_vif(reg): """ Summary of variance inflation factors for the model. @@ -647,12 +740,16 @@ def _summary_vif(reg): vif = diagnostics.vif(reg) strSummary = "\nVARIANCE INFLATION FACTOR\n" strSummary += " Variable VIF Tolerance\n" - for i in range(len(reg.name_x)-1): + for i in range(len(reg.name_x) - 1): i += 1 strSummary += "%20s %12.4f %12.4f\n" % ( - reg.name_x[i], vif[i][0], vif[i][1]) + reg.name_x[i], + vif[i][0], + vif[i][1], + ) return strSummary + def _summary_dwh(reg): """ Summary of Durbin-Wu-Hausman test on endogeneity of variables. @@ -667,8 +764,11 @@ def _summary_dwh(reg): """ strSummary = "\nREGRESSION DIAGNOSTICS\n" - strSummary += ( - "TEST DF VALUE PROB\n") + strSummary += "TEST DF VALUE PROB\n" strSummary += "%-27s %2d %12.3f %9.4f\n" % ( - "Durbin-Wu-Hausman test",reg.yend.shape[1],reg.dwh[0],reg.dwh[1]) - return strSummary \ No newline at end of file + "Durbin-Wu-Hausman test", + reg.yend.shape[1], + reg.dwh[0], + reg.dwh[1], + ) + return strSummary diff --git a/spreg/panel_fe.py b/spreg/panel_fe.py index 786e4d2..99d6605 100644 --- a/spreg/panel_fe.py +++ b/spreg/panel_fe.py @@ -29,7 +29,6 @@ class BasePanel_FE_Lag(RegressionPropsY, RegressionPropsVM): - """ Base ML method for a fixed effects spatial lag model (note no consistency checks, diagnostics or constants added) :cite:`Elhorst2003`. @@ -181,7 +180,7 @@ def __init__(self, y, x, w, epsilon=0.0000001): ( np.zeros((self.k, 1)), self.t * tr1 / self.sig2, - self.n * self.t / (2.0 * self.sig2 ** 2), + self.n * self.t / (2.0 * self.sig2**2), ) ) @@ -194,7 +193,6 @@ def __init__(self, y, x, w, epsilon=0.0000001): class Panel_FE_Lag(BasePanel_FE_Lag): - """ ML estimation of the fixed effects spatial lag model with all results and diagnostics :cite:`Elhorst2003`. @@ -346,7 +344,6 @@ def __init__( class BasePanel_FE_Error(RegressionPropsY, RegressionPropsVM): - """ Base ML method for a fixed effects spatial error model (note no consistency checks, diagnostics or constants added) :cite:`Elhorst2003`. @@ -471,7 +468,7 @@ def __init__(self, y, x, w, epsilon=0.0000001): v1 = np.vstack((self.t * (tr2 + tr3), self.t * tr1 / self.sig2)) v2 = np.vstack( - (self.t * tr1 / self.sig2, self.t * self.n / (2.0 * self.sig2 ** 2)) + (self.t * tr1 / self.sig2, self.t * self.n / (2.0 * self.sig2**2)) ) v = np.hstack((v1, v2)) @@ -488,7 +485,6 @@ def __init__(self, y, x, w, epsilon=0.0000001): class Panel_FE_Error(BasePanel_FE_Error): - """ ML estimation of the fixed effects spatial error model with all results and diagnostics :cite:`Elhorst2003`. diff --git a/spreg/panel_re.py b/spreg/panel_re.py index 6752eff..7f50641 100644 --- a/spreg/panel_re.py +++ b/spreg/panel_re.py @@ -36,7 +36,6 @@ class BasePanel_RE_Lag(RegressionPropsY, RegressionPropsVM): - """ Base ML method for a random effects spatial lag model (note no consistency checks, diagnostics or constants added) :cite:`Elhorst2003`. @@ -215,7 +214,7 @@ def __init__(self, bigy, bigx, w, epsilon=0.0000001): ( np.zeros((self.k, 1)), -tr1 / self.sig2, - self.n * (1 + 1 / self.phi ** 2), + self.n * (1 + 1 / self.phi**2), -self.n / self.sig2, ) ) @@ -223,8 +222,8 @@ def __init__(self, bigy, bigx, w, epsilon=0.0000001): ( np.zeros((self.k, 1)), self.t * tr1 / self.sig2, - -self.n / self.sig2 ** 2, - self.n * self.t / (2.0 * self.sig2 ** 2), + -self.n / self.sig2**2, + self.n * self.t / (2.0 * self.sig2**2), ) ) @@ -237,7 +236,6 @@ def __init__(self, bigy, bigx, w, epsilon=0.0000001): class Panel_RE_Lag(BasePanel_RE_Lag): - """ ML estimation of the random effects spatial lag model with all results and diagnostics :cite:`Elhorst2003`. @@ -401,7 +399,6 @@ def __init__( class BasePanel_RE_Error(RegressionPropsY, RegressionPropsVM): - """ Base ML method for a random effects spatial error model (note no consistency checks, diagnostics or constants added) :cite:`Elhorst2003`. @@ -518,7 +515,7 @@ def __init__(self, y, x, w, epsilon=0.0000001): ) # b, residuals and predicted values - cvals = self.t * self.phi ** 2 + 1 / (1 - self.lam * evals) ** 2 + cvals = self.t * self.phi**2 + 1 / (1 - self.lam * evals) ** 2 P = spdot(np.diag(cvals ** (-0.5)), evecs.T) pr = P - (I - self.lam * W) pr_nt = sp.kron(sp.identity(self.t), pr, format="csr") @@ -541,7 +538,7 @@ def __init__(self, y, x, w, epsilon=0.0000001): # variance-covariance matrix betas varb = self.sig2 * xsxsi # variance of random effects - self.sig2_u = self.phi ** 2 * self.sig2 + self.sig2_u = self.phi**2 * self.sig2 self.betas = np.vstack((b, self.lam, self.sig2_u)) @@ -572,7 +569,7 @@ def __init__(self, y, x, w, epsilon=0.0000001): v1 = np.vstack( ( - (self.t - 1) / 2 * tr1 ** 2 + 1 / 2 * tr4 ** 2, + (self.t - 1) / 2 * tr1**2 + 1 / 2 * tr4**2, self.t / (2 * self.sig2) * tr6, (self.t - 1) / (2 * self.sig2) * tr1 + 1 / (2 * self.sig2) * tr7, ) @@ -580,15 +577,15 @@ def __init__(self, y, x, w, epsilon=0.0000001): v2 = np.vstack( ( self.t / (2 * self.sig2) * tr6, - self.t ** 2 / (2.0 * self.sig2 ** 2) * tr2 ** 2, - self.t / (2.0 * self.sig2 ** 2) * tr5, + self.t**2 / (2.0 * self.sig2**2) * tr2**2, + self.t / (2.0 * self.sig2**2) * tr5, ) ) v3 = np.vstack( ( (self.t - 1) / (2 * self.sig2) * tr1 + 1 / (2 * self.sig2) * tr7, - self.t / (2.0 * self.sig2 ** 2) * tr5, - 1 / (2.0 * self.sig2 ** 2) * ((self.t - 1) * self.n + tr3 ** 2), + self.t / (2.0 * self.sig2**2) * tr5, + 1 / (2.0 * self.sig2**2) * ((self.t - 1) * self.n + tr3**2), ) ) @@ -606,7 +603,6 @@ def __init__(self, y, x, w, epsilon=0.0000001): class Panel_RE_Error(BasePanel_RE_Error): - """ ML estimation of the random effects spatial error model with all results and diagnostics :cite:`Elhorst2003`. @@ -788,7 +784,7 @@ def phi_c_loglik(phi, rho, beta, bigy, bigx, n, t, W_nt): er = y - rho * ylag - spdot(x, beta) sig2 = spdot(er.T, er) nlsig2 = (n * t / 2.0) * np.log(sig2) - nphi2 = (n / 2.0) * np.log(phi ** 2) + nphi2 = (n / 2.0) * np.log(phi**2) clike = nlsig2 - nphi2 return clike @@ -798,7 +794,7 @@ def err_c_loglik_ord( ): # concentrated log-lik for error model, no constants, eigenvalues lam, phi = lam_phi - cvals = t * phi ** 2 + 1 / (1 - lam * evals) ** 2 + cvals = t * phi**2 + 1 / (1 - lam * evals) ** 2 P = spdot(np.diag(cvals ** (-0.5)), evecs.T) pr = P - (I - lam * Wsp) pr_nt = sp.kron(sp.identity(t), pr, format="csr") @@ -817,7 +813,7 @@ def err_c_loglik_ord( sig2 = ee[0][0] nlsig2 = (n * t / 2.0) * np.log(sig2) # Term 2 - revals = t * phi ** 2 * (1 - lam * evals) ** 2 + revals = t * phi**2 * (1 - lam * evals) ** 2 phi_jacob = 1 / 2 * np.log(1 + revals).sum() # Term 3 jacob = t * np.log(1 - lam * evals).sum() diff --git a/spreg/panel_utils.py b/spreg/panel_utils.py index d4f96af..f3b22e0 100644 --- a/spreg/panel_utils.py +++ b/spreg/panel_utils.py @@ -39,7 +39,7 @@ def check_panel(y, x, w, name_y, name_x): except AttributeError: name_y = y.name y = y.to_numpy() - + if isinstance(x, (pd.Series, pd.DataFrame)): if name_x is None: try: diff --git a/spreg/probit.py b/spreg/probit.py index e644787..14acb95 100644 --- a/spreg/probit.py +++ b/spreg/probit.py @@ -7,6 +7,7 @@ import scipy.optimize as op from scipy.stats import norm, chi2 from libpysal import weights + chisqprob = chi2.sf import scipy.sparse as SP from . import user_output as USER @@ -17,7 +18,6 @@ class BaseProbit(object): - """ Probit class to do all the computations @@ -166,24 +166,18 @@ def z_stat(self): except AttributeError: self._cache = {} variance = self.vm.diagonal() - zStat = ( - self.betas.reshape( - len(self.betas), - ) - / np.sqrt(variance) - ) + zStat = self.betas.reshape( + len(self.betas), + ) / np.sqrt(variance) rs = {} for i in range(len(self.betas)): rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2) self._cache["z_stat"] = rs.values() except KeyError: variance = self.vm.diagonal() - zStat = ( - self.betas.reshape( - len(self.betas), - ) - / np.sqrt(variance) - ) + zStat = self.betas.reshape( + len(self.betas), + ) / np.sqrt(variance) rs = {} for i in range(len(self.betas)): rs[i] = (zStat[i], norm.sf(abs(zStat[i])) * 2) @@ -415,13 +409,13 @@ def slopes_vm(self): x = self.xmean b = self.betas dfdb = np.eye(self.k) - spdot(b.T, x) * spdot(b, x.T) - slopes_vm = (self.scale ** 2) * np.dot(np.dot(dfdb, self.vm), dfdb.T) + slopes_vm = (self.scale**2) * np.dot(np.dot(dfdb, self.vm), dfdb.T) self._cache["slopes_vm"] = slopes_vm[1:, 1:] except KeyError: x = self.xmean b = self.betas dfdb = np.eye(self.k) - spdot(b.T, x) * spdot(b, x.T) - slopes_vm = (self.scale ** 2) * np.dot(np.dot(dfdb, self.vm), dfdb.T) + slopes_vm = (self.scale**2) * np.dot(np.dot(dfdb, self.vm), dfdb.T) self._cache["slopes_vm"] = slopes_vm[1:, 1:] return self._cache["slopes_vm"] @@ -635,7 +629,6 @@ def hessian(self, par): class Probit(BaseProbit): - """ Classic non-spatial Probit and spatial diagnostics. The class includes a printout that formats all the results and tests in a nice format. @@ -851,7 +844,6 @@ def __init__( name_ds=None, spat_diag=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) x_constant, name_x, warn = USER.check_constant(x, name_x) @@ -865,7 +857,7 @@ def __init__( if slx_lags > 0: lag_x = get_lags(w, x_constant[:, 1:], slx_lags) x_constant = np.hstack((x_constant, lag_x)) - self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) else: ws = None @@ -874,7 +866,7 @@ def __init__( ) self.title = "CLASSIC PROBIT ESTIMATOR" if slx_lags > 0: - self.title += " WITH SPATIALLY LAGGED X (SLX)" + self.title += " WITH SPATIALLY LAGGED X (SLX)" self.slx_lags = slx_lags self.name_ds = USER.set_name_ds(name_ds) self.name_y = USER.set_name_y(name_y) @@ -941,7 +933,7 @@ def sp_tests(reg): LM_err_num = np.dot(u_gen.T, (w * u_gen)) ** 2 trWW = np.sum((w * w).diagonal()) trWWWWp = trWW + np.sum((w * w.T).diagonal()) - LM_err = float(1.0 * LM_err_num / (sig2 ** 2 * trWWWWp)) + LM_err = float(1.0 * LM_err_num / (sig2**2 * trWWWWp)) LM_err = np.array([LM_err, chisqprob(LM_err, 1)]) # KP_error: moran = moran_KP(reg.w, u_naive, Phi_prod) diff --git a/spreg/regimes.py b/spreg/regimes.py index d0c4db0..824586a 100755 --- a/spreg/regimes.py +++ b/spreg/regimes.py @@ -214,7 +214,9 @@ class Regimes_Frame: """ - def __init__(self, x, regimes, constant_regi, cols2regi, names=None, yend=False, rlist=False): + def __init__( + self, x, regimes, constant_regi, cols2regi, names=None, yend=False, rlist=False + ): if cols2regi == "all": cols2regi = [True] * x.shape[1] else: @@ -433,7 +435,9 @@ def regimeX_setup(x, regimes, cols2regi, regimes_set, constant=False): return xsp -def set_name_x_regimes(name_x, regimes, constant_regi, cols2regi, regimes_set, rlist=False): +def set_name_x_regimes( + name_x, regimes, constant_regi, cols2regi, regimes_set, rlist=False +): """ Generate the set of variable names in a regimes setup, according to the order of the betas diff --git a/spreg/skater_reg.py b/spreg/skater_reg.py index 2b203fb..03ea6f3 100755 --- a/spreg/skater_reg.py +++ b/spreg/skater_reg.py @@ -308,7 +308,8 @@ def fit( print("cut made {}...".format(best_deletion)) if best_deletion.score > prev_score: raise ValueError( - ("The score increased with the number of clusters. " + ( + "The score increased with the number of clusters. " "Please check your data.\nquorum: {}; n_clusters: {}" ).format(quorum, n_clusters) ) @@ -377,7 +378,10 @@ def score_spreg( } trees_scores = {} - if data_reg["reg"].__name__ == "GM_Lag" or data_reg["reg"].__name__ == "BaseGM_Lag": + if ( + data_reg["reg"].__name__ == "GM_Lag" + or data_reg["reg"].__name__ == "BaseGM_Lag" + ): try: x = np.hstack((np.ones((data_reg["x"].shape[0], 1)), data_reg["x"])) reg = TSLS_Regimes( @@ -385,7 +389,8 @@ def score_spreg( x=x, yend=data_reg["yend"], q=data_reg["q"], - regimes=all_labels,) + regimes=all_labels, + ) except: x = _const_x(data_reg["x"]) reg = TSLS_Regimes( @@ -393,10 +398,10 @@ def score_spreg( x=x, yend=data_reg["yend"], q=data_reg["q"], - regimes=all_labels,) + regimes=all_labels, + ) score = np.dot(reg.u.T, reg.u)[0][0] else: - for l in set_labels: x = data_reg["x"][all_labels == l] if np.linalg.matrix_rank(x) < x.shape[1]: @@ -423,12 +428,18 @@ def score_spreg( try: x = np.hstack((np.ones((x.shape[0], 1)), x)) reg = data_reg["reg"]( - y=data_reg["y"][all_labels == l], x=x, w=w_regi_i, **kargs + y=data_reg["y"][all_labels == l], + x=x, + w=w_regi_i, + **kargs, ) except np.linalg.LinAlgError: x = _const_x(x) reg = data_reg["reg"]( - y=data_reg["y"][all_labels == l], x=x, w=w_regi_i, **kargs + y=data_reg["y"][all_labels == l], + x=x, + w=w_regi_i, + **kargs, ) trees_scores[l] = np.dot(reg.u.T, reg.u)[0][0] score = sum(trees_scores.values()) @@ -502,7 +513,7 @@ def score_stats( data_reg["y"][all_labels == l], x, **kargs ).fit() - trees_scores[l] = np.sum(reg.resid ** 2) + trees_scores[l] = np.sum(reg.resid**2) score = sum(trees_scores.values()) else: part_scores, score, trees_scores = self._data_reg_none( @@ -524,10 +535,10 @@ def _prep_score(self, all_labels, current_tree, current_labels): return labels, subtree_quorums def _data_reg_none(self, data, all_labels, l, set_labels): - assert data.shape[0] == len( - all_labels - ), "Length of label array ({}) does not match " "length of data ({})! ".format( - all_labels.shape[0], data.shape[0] + assert data.shape[0] == len(all_labels), ( + "Length of label array ({}) does not match " "length of data ({})! ".format( + all_labels.shape[0], data.shape[0] + ) ) part_scores = [ self.reduction( @@ -545,8 +556,15 @@ def _data_reg_none(self, data, all_labels, l, set_labels): def _prep_lag(self, data_reg): # if the model is a spatial lag, add the lagged dependent variable to the model - data_reg['yend'], data_reg['q'] = set_endog(data_reg["y"], data_reg["x"][:, 1:], data_reg["w"], yend=None, - q=None, w_lags=1, lag_q=True) + data_reg["yend"], data_reg["q"] = set_endog( + data_reg["y"], + data_reg["x"][:, 1:], + data_reg["w"], + yend=None, + q=None, + w_lags=1, + lag_q=True, + ) return data_reg def find_cut( @@ -618,7 +636,10 @@ def tqdm(noop, desc=""): best_d_score = -np.inf try: - if data_reg["reg"].__name__ == "GM_Lag" or data_reg["reg"].__name__ == "BaseGM_Lag": + if ( + data_reg["reg"].__name__ == "GM_Lag" + or data_reg["reg"].__name__ == "BaseGM_Lag" + ): data_reg = self._prep_lag(data_reg) except: pass @@ -675,9 +696,9 @@ def tqdm(noop, desc=""): best_d_score = d_score try: for i in set(current_labels): - best_scores[ - local_labels[current_list.index(i)] - ] = trees_scores[i] + best_scores[local_labels[current_list.index(i)]] = ( + trees_scores[i] + ) for i in new_trees_scores: best_scores[i] = new_trees_scores[i] except: @@ -693,4 +714,4 @@ def tqdm(noop, desc=""): def _const_x(x): x = x[:, np.ptp(x, axis=0) != 0] x = np.hstack((np.ones((x.shape[0], 1)), x)) - return x \ No newline at end of file + return x diff --git a/spreg/sp_panels.py b/spreg/sp_panels.py index e14f3b0..79e3773 100644 --- a/spreg/sp_panels.py +++ b/spreg/sp_panels.py @@ -21,7 +21,6 @@ class BaseGM_KKP(RegressionPropsY): - ''' Base GMM method for a spatial random effects panel model based on Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`. @@ -69,7 +68,6 @@ class BaseGM_KKP(RegressionPropsY): ''' def __init__(self, y, x, w, full_weights=False): - # 1a. OLS --> \tilde{\delta} ols = OLS.BaseOLS(y=y, x=x) self.x, self.y, self.n, self.k, self.xtx = ols.x, ols.y, ols.n, ols.k, ols.xtx @@ -117,7 +115,6 @@ def __init__(self, y, x, w, full_weights=False): class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame): - ''' GMM method for a spatial random effects panel model based on Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`. diff --git a/spreg/sputils.py b/spreg/sputils.py index 80f7a2d..124bbb9 100755 --- a/spreg/sputils.py +++ b/spreg/sputils.py @@ -274,7 +274,7 @@ def spisfinite(a): def _spmultiplier(w, rho, method="simple", mtol=0.00000001): - """" + """ " Spatial Lag Multiplier Calculation Follows Kim, Phipps and Anselin (2003) (simple), and LeSage and Pace (2009) (full, power) @@ -294,7 +294,7 @@ def _spmultiplier(w, rho, method="simple", mtol=0.00000001): pow = powers used in power approximation (otherwise 0) """ - multipliers = {"ati": 1.0, "adi": 1.0, "aii": 1.0, "method": method, "warn": ''} + multipliers = {"ati": 1.0, "adi": 1.0, "aii": 1.0, "method": method, "warn": ""} multipliers["pow"] = 0 multipliers["ati"] = 1.0 / (1.0 - rho) n = w.n @@ -303,12 +303,12 @@ def _spmultiplier(w, rho, method="simple", mtol=0.00000001): elif method == "full": wf = w.full()[0] id0 = np.identity(n) - irw0 = (id0 - rho * wf) + irw0 = id0 - rho * wf invirw0 = np.linalg.inv(irw0) adii0 = np.sum(np.diag(invirw0)) multipliers["adi"] = adii0 / n elif method == "power": - ws3 = w.to_sparse(fmt='csr') + ws3 = w.to_sparse(fmt="csr") rhop = rho ww = ws3 pow = 1 @@ -325,15 +325,18 @@ def _spmultiplier(w, rho, method="simple", mtol=0.00000001): multipliers["adi"] = adi.item() multipliers["pow"] = pow else: - multipliers["warn"] = "Method '"+method+"' not supported for spatial impacts.\n" - multipliers["method"] ='simple' + multipliers["warn"] = ( + "Method '" + method + "' not supported for spatial impacts.\n" + ) + multipliers["method"] = "simple" multipliers["aii"] = multipliers["ati"] - multipliers["adi"] - return (multipliers) + return multipliers + -def _sp_effects(reg, variables, spmult, slx_lags=0,slx_vars="All"): +def _sp_effects(reg, variables, spmult, slx_lags=0, slx_vars="All"): """ Calculate spatial lag, direct and indirect effects - + Attributes ---------- reg : regression object @@ -349,21 +352,23 @@ def _sp_effects(reg, variables, spmult, slx_lags=0,slx_vars="All"): bdir : direct effects bind : indirect effects """ - + variables_x_index = variables.index - m1 = spmult['ati'] + m1 = spmult["ati"] btot = m1 * reg.betas[variables_x_index] - m2 = spmult['adi'] + m2 = spmult["adi"] bdir = m2 * reg.betas[variables_x_index] - # Assumes all SLX effects are indirect effects. + # Assumes all SLX effects are indirect effects. if slx_lags > 0: if reg.output.regime.nunique() > 1: - btot_idx = pd.Series(btot.flatten(), index=variables_x_index) - wchunk_size = len(variables.query("regime == @reg.output.regime.iloc[0]")) #Number of exogenous variables in each regime + btot_idx = pd.Series(btot.flatten(), index=variables_x_index) + wchunk_size = len( + variables.query("regime == @reg.output.regime.iloc[0]") + ) # Number of exogenous variables in each regime for i in range(slx_lags): - chunk_indices = variables_x_index + (i+1) * wchunk_size + chunk_indices = variables_x_index + (i + 1) * wchunk_size bmult = m1 * reg.betas[chunk_indices] btot_idx[variables_x_index] += bmult.flatten() btot = btot_idx.to_numpy().reshape(btot.shape) @@ -371,12 +376,14 @@ def _sp_effects(reg, variables, spmult, slx_lags=0,slx_vars="All"): else: variables_wx = reg.output.query("var_type == 'wx'") variables_wx_index = variables_wx.index - if hasattr(reg, 'slx_vars') and isinstance(slx_vars,list): - flexwx_indices = list(compress(variables_x_index,slx_vars)) # indices of x variables in wx + if hasattr(reg, "slx_vars") and isinstance(slx_vars, list): + flexwx_indices = list( + compress(variables_x_index, slx_vars) + ) # indices of x variables in wx else: - flexwx_indices = variables_x_index # all x variables + flexwx_indices = variables_x_index # all x variables xind = [h - 1 for h in flexwx_indices] - wchunk_size = len(variables_wx_index)//slx_lags + wchunk_size = len(variables_wx_index) // slx_lags for i in range(slx_lags): start_idx = i * wchunk_size end_idx = start_idx + wchunk_size @@ -386,11 +393,12 @@ def _sp_effects(reg, variables, spmult, slx_lags=0,slx_vars="All"): bind = btot - bdir else: - m3 = spmult['aii'] + m3 = spmult["aii"] bind = m3 * reg.betas[variables_x_index] return btot, bdir, bind + def _test(): import doctest diff --git a/spreg/summary_output.py b/spreg/summary_output.py index 627b3f7..fc3b3d3 100755 --- a/spreg/summary_output.py +++ b/spreg/summary_output.py @@ -237,11 +237,12 @@ def ML_Lag(reg, w, vm, spat_diag, regimes=False): # extra space d "Akaike info criterion", reg.aic, ) - reg.__summary[ - "summary_r2" - ] += " %-22s:%12.3f\n" % ( - "Schwarz criterion", - reg.schwarz, + reg.__summary["summary_r2"] += ( + " %-22s:%12.3f\n" + % ( + "Schwarz criterion", + reg.schwarz, + ) ) # build coefficients table body summary_coefs_allx(reg, reg.z_stat) @@ -270,11 +271,12 @@ def ML_Lag_multi(reg, multireg, vm, spat_diag, regimes=False, sur=False, w=False "Akaike info criterion", mreg.aic, ) - mreg.__summary[ - "summary_r2" - ] += " %-22s:%12.3f\n" % ( - "Schwarz criterion", - mreg.schwarz, + mreg.__summary["summary_r2"] += ( + " %-22s:%12.3f\n" + % ( + "Schwarz criterion", + mreg.schwarz, + ) ) # build coefficients table body summary_coefs_allx(mreg, mreg.z_stat) @@ -312,11 +314,12 @@ def ML_Error(reg, w, vm, spat_diag, regimes=False): # extra space d "Akaike info criterion", reg.aic, ) - reg.__summary[ - "summary_r2" - ] += " %-22s:%12.3f\n" % ( - "Schwarz criterion", - reg.schwarz, + reg.__summary["summary_r2"] += ( + " %-22s:%12.3f\n" + % ( + "Schwarz criterion", + reg.schwarz, + ) ) # build coefficients table body summary_coefs_allx(reg, reg.z_stat) @@ -345,11 +348,12 @@ def ML_Error_multi(reg, multireg, vm, spat_diag, regimes=False, sur=False, w=Fal "Akaike info criterion", mreg.aic, ) - mreg.__summary[ - "summary_r2" - ] += " %-22s:%12.3f\n" % ( - "Schwarz criterion", - mreg.schwarz, + mreg.__summary["summary_r2"] += ( + " %-22s:%12.3f\n" + % ( + "Schwarz criterion", + mreg.schwarz, + ) ) # build coefficients table body summary_coefs_allx(mreg, mreg.z_stat) @@ -459,7 +463,7 @@ def GM_Error_Hom(reg, vm, w, regimes=False): # build coefficients table body beta_position = summary_coefs_allx(reg, reg.z_stat, lambd=True) if regimes: - #summary_coefs_lambda(reg, reg.z_stat) + # summary_coefs_lambda(reg, reg.z_stat) summary_regimes(reg) summary_warning(reg) summary(reg=reg, vm=vm, instruments=False, nonspat_diag=False, spat_diag=False) @@ -501,7 +505,7 @@ def GM_Endog_Error_Hom(reg, vm, w, regimes=False): summary_coefs_allx(reg, reg.z_stat, lambd=True) summary_coefs_instruments(reg) if regimes: - #summary_coefs_lambda(reg, reg.z_stat) + # summary_coefs_lambda(reg, reg.z_stat) summary_regimes(reg) summary_warning(reg) summary(reg=reg, vm=vm, instruments=True, nonspat_diag=False, spat_diag=False) @@ -543,7 +547,7 @@ def GM_Error_Het(reg, vm, w, regimes=False): # build coefficients table body beta_position = summary_coefs_allx(reg, reg.z_stat, lambd=True) if regimes: - #summary_coefs_lambda(reg, reg.z_stat) + # summary_coefs_lambda(reg, reg.z_stat) summary_regimes(reg) summary_warning(reg) summary(reg=reg, vm=vm, instruments=False, nonspat_diag=False, spat_diag=False) @@ -585,7 +589,7 @@ def GM_Endog_Error_Het(reg, vm, w, regimes=False): summary_coefs_allx(reg, reg.z_stat, lambd=True) summary_coefs_instruments(reg) if regimes: - #summary_coefs_lambda(reg, reg.z_stat) + # summary_coefs_lambda(reg, reg.z_stat) summary_regimes(reg) summary_warning(reg) summary(reg=reg, vm=vm, instruments=True, nonspat_diag=False, spat_diag=False) @@ -671,7 +675,7 @@ def GM_Combo_Hom(reg, vm, w, regimes=False): summary_coefs_allx(reg, reg.z_stat, lambd=True) summary_coefs_instruments(reg) if regimes: - #summary_coefs_lambda(reg, reg.z_stat) + # summary_coefs_lambda(reg, reg.z_stat) summary_regimes(reg) summary_warning(reg) summary(reg=reg, vm=vm, instruments=True, nonspat_diag=False, spat_diag=False) @@ -715,7 +719,7 @@ def GM_Combo_Het(reg, vm, w, regimes=False): summary_coefs_allx(reg, reg.z_stat, lambd=True) summary_coefs_instruments(reg) if regimes: - #summary_coefs_lambda(reg, reg.z_stat) + # summary_coefs_lambda(reg, reg.z_stat) summary_regimes(reg) summary_warning(reg) summary(reg=reg, vm=vm, instruments=True, nonspat_diag=False, spat_diag=False) @@ -765,9 +769,9 @@ def Probit(reg, vm, w, spat_diag): reg.__summary["summary_r2"] += "%-21s: %3.4f\n" % ("LR test", reg.LR[0]) reg.__summary["summary_r2"] += "%-21s: %3.4f\n" % ("LR test (p-value)", reg.LR[1]) if reg.warning: - reg.__summary[ - "summary_r2" - ] += "\nMaximum number of iterations exceeded or gradient and/or function calls not changing\n" + reg.__summary["summary_r2"] += ( + "\nMaximum number of iterations exceeded or gradient and/or function calls not changing\n" + ) # build coefficients table body beta_position = summary_coefs_allx(reg, reg.z_stat) reg.__summary["summary_other_mid"] = summary_coefs_slopes(reg) @@ -879,11 +883,12 @@ def Panel_FE_Lag(reg, w, vm): "Akaike info criterion", reg.aic, ) - reg.__summary[ - "summary_r2" - ] += " %-22s:%12.3f\n" % ( - "Schwarz criterion", - reg.schwarz, + reg.__summary["summary_r2"] += ( + " %-22s:%12.3f\n" + % ( + "Schwarz criterion", + reg.schwarz, + ) ) # build coefficients table body summary_coefs_allx(reg, reg.z_stat) @@ -907,11 +912,12 @@ def Panel_FE_Error(reg, w, vm, regimes=False): "Akaike info criterion", reg.aic, ) - reg.__summary[ - "summary_r2" - ] += " %-22s:%12.3f\n" % ( - "Schwarz criterion", - reg.schwarz, + reg.__summary["summary_r2"] += ( + " %-22s:%12.3f\n" + % ( + "Schwarz criterion", + reg.schwarz, + ) ) # build coefficients table body summary_coefs_allx(reg, reg.z_stat) @@ -975,9 +981,7 @@ def beta_diag_lag(reg, robust, error=True): reg.pr2_e, ) else: - reg.__summary[ - "summary_r2" - ] += ( + reg.__summary["summary_r2"] += ( "Spatial Pseudo R-squared: omitted due to rho outside the boundary (-1, 1)." ) @@ -1195,7 +1199,7 @@ def _get_var_indices(reg, zt_stat, lambd=False): j = i * krex jyd = krex * reg.nr + i * reg.kryd + kf - kfyd if len(zt_stat) == len(reg.betas) and lambd: - jyd += -1 #discount lambda as fixed coefficient in this counter + jyd += -1 # discount lambda as fixed coefficient in this counter name_reg = var_names[j + j_con : j + krex] + var_names[jyd : jyd + reg.kryd] # name_reg.sort() if reg.constant_regi == "many": @@ -1205,12 +1209,13 @@ def _get_var_indices(reg, zt_stat, lambd=False): if reg.constant_regi == "one": indices += [krex * reg.nr] if len(indices) < last_v: - if len(indices) - last_v == -1 and reg.kryd>0 and lambd: - name_reg = ['lambda'] + if len(indices) - last_v == -1 and reg.kryd > 0 and lambd: + name_reg = ["lambda"] else: name_reg = ( - var_names[krex * reg.nr + 1 - j_con : krex * reg.nr + kf - kfyd] - + var_names[reg.kr * reg.nr + kf - kfyd : reg.kr * reg.nr + kf]) + var_names[krex * reg.nr + 1 - j_con : krex * reg.nr + kf - kfyd] + + var_names[reg.kr * reg.nr + kf - kfyd : reg.kr * reg.nr + kf] + ) # name_reg.sort() indices += [var_names.index(ind) for ind in name_reg] @@ -1427,14 +1432,15 @@ def summary_coefs_lambda(reg, zt_stat): except: name_var = reg.name_x if len(reg.betas) == len(zt_stat): - reg.__summary[ - "summary_coefs" - ] += "%20s %12.7f %12.7f %12.7f %12.7f\n" % ( - name_var[-1], - reg.betas[-1][0], - reg.std_err[-1], - zt_stat[-1][0], - zt_stat[-1][1], + reg.__summary["summary_coefs"] += ( + "%20s %12.7f %12.7f %12.7f %12.7f\n" + % ( + name_var[-1], + reg.betas[-1][0], + reg.std_err[-1], + zt_stat[-1][0], + zt_stat[-1][1], + ) ) else: n_coef = len(reg.betas) - len(zt_stat) diff --git a/spreg/sur.py b/spreg/sur.py index b8c33f0..def0798 100644 --- a/spreg/sur.py +++ b/spreg/sur.py @@ -425,7 +425,6 @@ def __init__( name_w=None, name_regimes=None, ): - self.name_ds = USER.set_name_ds(name_ds) self.name_w = USER.set_name_w(name_w, w) self.n_eq = len(bigy.keys()) @@ -817,7 +816,6 @@ def __init__( name_ds=None, name_regimes=None, ): - self.name_ds = USER.set_name_ds(name_ds) self.n_eq = len(bigy.keys()) diff --git a/spreg/sur_error.py b/spreg/sur_error.py index 37143f5..ccc791a 100644 --- a/spreg/sur_error.py +++ b/spreg/sur_error.py @@ -328,7 +328,6 @@ def __init__( name_w=None, name_regimes=None, ): - # check on variable names for listing results self.name_ds = USER.set_name_ds(name_ds) self.name_w = USER.set_name_w(name_w, w) @@ -777,7 +776,6 @@ def __init__( name_w=None, name_regimes=None, ): - # need checks on match between bigy, bigX dimensions # check on variable names for listing results self.name_ds = USER.set_name_ds(name_ds) @@ -1119,7 +1117,7 @@ def _test(): bigy0, bigX0, w, - #regimes=regimes, + # regimes=regimes, name_bigy=bigyvars0, name_bigX=bigXvars0, name_w="natqueen", diff --git a/spreg/sur_utils.py b/spreg/sur_utils.py index bf2912f..ef8f753 100644 --- a/spreg/sur_utils.py +++ b/spreg/sur_utils.py @@ -63,7 +63,7 @@ def sur_dictxy(db, y_vars, x_vars, space_id=None, time_id=None): bigy = {} bigy_vars = dict((r, y_vars[r]) for r in range(n_eq)) bigy = dict((r, np.resize(y[:, r], (n, 1))) for r in range(n_eq)) - if not (len(x_vars) == n_eq): + if not (len(x_vars) == n_eq): raise Exception("Error: mismatch variable lists") bigX = {} bigX_vars = {} @@ -80,7 +80,7 @@ def sur_dictxy(db, y_vars, x_vars, space_id=None, time_id=None): k = litxc.shape[1] return (bigy, bigX, bigy_vars, bigX_vars) elif len(y_vars) == 1: # splm format - if not (time_id): + if not (time_id): raise Exception("Error: time id must be specified") try: y = np.array([db[name] for name in y_vars]).T @@ -291,7 +291,7 @@ def sur_dict2mat(dicts): """ n_dicts = len(dicts.keys()) - #mat = np.vstack((dicts[t] for t in range(n_dicts))) + # mat = np.vstack((dicts[t] for t in range(n_dicts))) mat = np.vstack([dicts[t] for t in range(n_dicts)]) return mat @@ -395,11 +395,11 @@ def sur_est(bigXX, bigXy, bigE, bigK): for t in range(n_eq): sxy = sxy + sigi[r, t] * bigXy[(r, t)] sigiXy[r] = sxy - #xsigy = np.vstack((sigiXy[t] for t in range(n_eq))) + # xsigy = np.vstack((sigiXy[t] for t in range(n_eq))) xsigy = np.vstack(tuple(sigiXy[t] for t in range(n_eq))) - #xsigx = np.vstack(((np.hstack(sigiXX[(r, t)] for t in range(n_eq))) for r in range(n_eq))) + # xsigx = np.vstack(((np.hstack(sigiXX[(r, t)] for t in range(n_eq))) for r in range(n_eq))) array_lists = [[sigiXX[(r, t)] for t in range(n_eq)] for r in range(n_eq)] - xsigx = np.vstack([np.hstack(arr_list) for arr_list in array_lists]) + xsigx = np.vstack([np.hstack(arr_list) for arr_list in array_lists]) varb = la.inv(xsigx) beta = np.dot(varb, xsigy) bSUR = sur_mat2dict(beta, bigK) @@ -427,7 +427,7 @@ def sur_resids(bigy, bigX, beta): """ n_eq = len(bigy.keys()) - #bigE = np.hstack((bigy[r] - spdot(bigX[r], beta[r])) for r in range(n_eq)) + # bigE = np.hstack((bigy[r] - spdot(bigX[r], beta[r])) for r in range(n_eq)) bigE = np.hstack(tuple(bigy[r] - spdot(bigX[r], beta[r]) for r in range(n_eq))) return bigE @@ -455,7 +455,7 @@ def sur_predict(bigy, bigX, beta): """ n_eq = len(bigy.keys()) - #bigYP = np.hstack(spdot(bigX[r], beta[r]) for r in range(n_eq)) + # bigYP = np.hstack(spdot(bigX[r], beta[r]) for r in range(n_eq)) bigYP = np.hstack([spdot(bigX[r], beta[r]) for r in range(n_eq)]) return bigYP diff --git a/spreg/twosls.py b/spreg/twosls.py index 5bae2e1..e2868fd 100644 --- a/spreg/twosls.py +++ b/spreg/twosls.py @@ -4,7 +4,14 @@ from . import user_output as USER from . import diagnostics as DIAG from .output import output, _spat_diag_out, _summary_dwh -from .utils import spdot, sphstack, RegressionPropsY, RegressionPropsVM, set_warn, get_lags +from .utils import ( + spdot, + sphstack, + RegressionPropsY, + RegressionPropsVM, + set_warn, + get_lags, +) import pandas as pd __author__ = "Luc Anselin lanselin@gmail.com, Pedro Amaral pedrovma@gmail.com, David C. Folch david.folch@asu.edu, Jing Yao jingyao@asu.edu" @@ -12,7 +19,6 @@ class BaseTSLS(RegressionPropsY, RegressionPropsVM): - """ Two stage least squares (2SLS) (note: no consistency checks, diagnostics or constant added) @@ -134,7 +140,6 @@ class BaseTSLS(RegressionPropsY, RegressionPropsVM): def __init__( self, y, x, yend, q=None, h=None, robust=None, gwk=None, sig2n_k=False ): - if issubclass(type(q), np.ndarray) and issubclass(type(h), np.ndarray): raise Exception("Please do not provide 'q' and 'h' together") if q is None and h is None: @@ -256,7 +261,7 @@ class TSLS(BaseTSLS): If True, then use n-k to estimate sigma^2. If False, use n. spat_diag : boolean If True, then compute Anselin-Kelejian test (requires w) - nonspat_diag : boolean + nonspat_diag : boolean If True, then compute non-spatial diagnostics vm : boolean If True, include variance-covariance matrix in summary @@ -463,27 +468,32 @@ def __init__( name_ds=None, latex=False, ): - n = USER.check_arrays(y, x, yend, q) y, name_y = USER.check_y(y, n, name_y) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) USER.check_robust(robust, gwk) if robust == "hac" and spat_diag: - set_warn( - self, - "Spatial diagnostics are not available for HAC estimation. The spatial diagnostics have been disabled for this model.", - ) - spat_diag = False + set_warn( + self, + "Spatial diagnostics are not available for HAC estimation. The spatial diagnostics have been disabled for this model.", + ) + spat_diag = False x_constant, name_x, warn = USER.check_constant(x, name_x) self.name_x = USER.set_name_x(name_x, x_constant) - w = USER.check_weights(w, y, slx_lags=slx_lags, w_required=spat_diag) - if slx_lags>0: -# lag_x = get_lags(w, x_constant[:, 1:], slx_lags) -# x_constant = np.hstack((x_constant, lag_x)) -# self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) - x_constant,self.name_x = USER.flex_wx(w,x=x_constant,name_x=self.name_x,constant=True, - slx_lags=slx_lags,slx_vars=slx_vars) + w = USER.check_weights(w, y, slx_lags=slx_lags, w_required=spat_diag) + if slx_lags > 0: + # lag_x = get_lags(w, x_constant[:, 1:], slx_lags) + # x_constant = np.hstack((x_constant, lag_x)) + # self.name_x += USER.set_name_spatial_lags(self.name_x[1:], slx_lags) + x_constant, self.name_x = USER.flex_wx( + w, + x=x_constant, + name_x=self.name_x, + constant=True, + slx_lags=slx_lags, + slx_vars=slx_vars, + ) set_warn(self, warn) BaseTSLS.__init__( @@ -508,21 +518,22 @@ def __init__( self.robust = USER.set_robust(robust) self.name_w = USER.set_name_w(name_w, w) self.name_gwk = USER.set_name_w(name_gwk, gwk) - self.output = pd.DataFrame(self.name_x + self.name_yend, - columns=['var_names']) - self.output['var_type'] = ['x'] * len(self.name_x) + ['yend'] * len(self.name_yend) - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_x + self.name_yend, columns=["var_names"]) + self.output["var_type"] = ["x"] * len(self.name_x) + ["yend"] * len( + self.name_yend + ) + self.output["regime"], self.output["equation"] = (0, 0) diag_out = "" if nonspat_diag: self.dwh = DIAG.dwh(self) sum_dwh = _summary_dwh(self) diag_out += sum_dwh if spat_diag: - diag_out += _spat_diag_out(self, w, 'yend') - + diag_out += _spat_diag_out(self, w, "yend") output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) + def _test(): import doctest diff --git a/spreg/twosls_regimes.py b/spreg/twosls_regimes.py index 5163fbc..820df45 100644 --- a/spreg/twosls_regimes.py +++ b/spreg/twosls_regimes.py @@ -16,7 +16,6 @@ class TSLS_Regimes(BaseTSLS, REGI.Regimes_Frame): - """ Two stage least squares (2SLS) with regimes. @@ -366,7 +365,6 @@ def __init__( summ=True, latex=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) USER.check_robust(robust, gwk) @@ -438,7 +436,7 @@ def __init__( name_yend, name_q, summ, - latex + latex, ) else: q, self.name_q = REGI.Regimes_Frame.__init__( @@ -451,7 +449,7 @@ def __init__( constant_regi, cols2regi=cols2regi, names=name_x, - rlist=True + rlist=True, ) yend, self.name_yend, yend_rlist = REGI.Regimes_Frame.__init__( self, @@ -461,13 +459,16 @@ def __init__( cols2regi=cols2regi, yend=True, names=name_yend, - rlist=True + rlist=True, + ) + self.output = pd.DataFrame( + self.name_x + self.name_yend, columns=["var_names"] + ) + self.output["var_type"] = ["x"] * len(self.name_x) + ["yend"] * len( + self.name_yend ) - self.output = pd.DataFrame(self.name_x+self.name_yend, - columns=['var_names']) - self.output['var_type'] = ['x']*len(self.name_x)+['yend']*len(self.name_yend) - self.output['regime'] = x_rlist+yend_rlist - self.output['equation'] = 0 + self.output["regime"] = x_rlist + yend_rlist + self.output["equation"] = 0 BaseTSLS.__init__( self, y=y, x=x, yend=yend, q=q, robust=robust, gwk=gwk, sig2n_k=sig2n_k @@ -486,12 +487,11 @@ def __init__( self.robust = USER.set_robust(robust) if summ: if spat_diag: - diag_out = _spat_diag_out(self, w, 'yend') + diag_out = _spat_diag_out(self, w, "yend") else: diag_out = None output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) - def _tsls_regimes_multi( self, x, @@ -510,7 +510,7 @@ def _tsls_regimes_multi( name_yend, name_q, summ, - latex + latex, ): results_p = {} """ @@ -547,7 +547,7 @@ def _tsls_regimes_multi( name_q, self.name_w, self.name_regimes, - slx_lags + slx_lags, ), ) else: @@ -569,7 +569,7 @@ def _tsls_regimes_multi( name_q, self.name_w, self.name_regimes, - slx_lags + slx_lags, ) ) @@ -600,7 +600,9 @@ def _tsls_regimes_multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -617,24 +619,32 @@ def _tsls_regimes_multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend self.name_q += results[r].name_q self.name_z += results[r].name_z self.name_h += results[r].name_h - self.output = pd.concat([self.output, pd.DataFrame({'var_names': results[r].name_x+results[r].name_yend, - 'var_type': ['x']*len(results[r].name_x)+['yend']*len(results[r].name_yend), - 'regime': r, 'equation': r})], ignore_index=True) + self.output = pd.concat( + [ + self.output, + pd.DataFrame( + { + "var_names": results[r].name_x + results[r].name_yend, + "var_type": ["x"] * len(results[r].name_x) + + ["yend"] * len(results[r].name_yend), + "regime": r, + "equation": r, + } + ), + ], + ignore_index=True, + ) counter += 1 self.multi = results @@ -650,11 +660,11 @@ def _tsls_regimes_multi( self.chow = REGI.Chow(self) if spat_diag: self._get_spat_diag_props(results, regi_ids, x_constant, yend, q) - diag_out = _spat_diag_out(self, w, 'yend') + diag_out = _spat_diag_out(self, w, "yend") else: diag_out = None if summ: - self.output.sort_values(by='regime', inplace=True) + self.output.sort_values(by="regime", inplace=True) output(reg=self, vm=vm, robust=robust, other_end=diag_out, latex=latex) def _get_spat_diag_props(self, results, regi_ids, x, yend, q): @@ -728,9 +738,9 @@ def _work( def _optimal_weight(reg, sig2n_k, warn=True): try: - Hu = reg.h.toarray() * reg.u ** 2 + Hu = reg.h.toarray() * reg.u**2 except: - Hu = reg.h * reg.u ** 2 + Hu = reg.h * reg.u**2 if sig2n_k: S = spdot(reg.h.T, Hu, array_out=True) / (reg.n - reg.k) else: @@ -746,7 +756,7 @@ def _optimal_weight(reg, sig2n_k, warn=True): else: vm = fac2 * reg.n RegressionProps_basic(reg, betas=betas, vm=vm, sig2=False) - #reg.title += " (Optimal-Weighted GMM)" + # reg.title += " (Optimal-Weighted GMM)" if warn: set_warn( reg, "Residuals treated as homoskedastic for the purpose of diagnostics." @@ -789,7 +799,7 @@ def _test(): yd, q, regimes, - w = w, + w=w, constant_regi="many", spat_diag=True, name_y=y_var, @@ -797,15 +807,11 @@ def _test(): name_yend=yd_var, name_q=q_var, name_regimes=r_var, - #cols2regi=[False, True, True, False], + # cols2regi=[False, True, True, False], sig2n_k=False, - regime_err_sep = True, - #robust = 'hac', - vm = False + regime_err_sep=True, + # robust = 'hac', + vm=False, ) print(tslsr.output) print(tslsr.summary) - - - - diff --git a/spreg/twosls_sp.py b/spreg/twosls_sp.py index 59c247a..6c12342 100755 --- a/spreg/twosls_sp.py +++ b/spreg/twosls_sp.py @@ -171,31 +171,28 @@ class BaseGM_Lag(TSLS.BaseTSLS): """ def __init__( - self, - y, - x, - yend=None, - q=None, - w=None, - w_lags=1, - slx_lags=0, - slx_vars="All", - lag_q=True, - robust=None, - gwk=None, - sig2n_k=False, + self, + y, + x, + yend=None, + q=None, + w=None, + w_lags=1, + slx_lags=0, + slx_vars="All", + lag_q=True, + robust=None, + gwk=None, + sig2n_k=False, ): - - - if slx_lags > 0: - yend2, q2, wx = set_endog(y, x[:, 1:], w, yend, q, w_lags, lag_q, slx_lags,slx_vars) + yend2, q2, wx = set_endog( + y, x[:, 1:], w, yend, q, w_lags, lag_q, slx_lags, slx_vars + ) x = np.hstack((x, wx)) else: yend2, q2 = set_endog(y, x[:, 1:], w, yend, q, w_lags, lag_q) - - TSLS.BaseTSLS.__init__( self, y=y, x=x, yend=yend2, q=q2, robust=robust, gwk=gwk, sig2n_k=sig2n_k ) @@ -504,33 +501,32 @@ class GM_Lag(BaseGM_Lag): """ def __init__( - self, - y, - x, - yend=None, - q=None, - w=None, - w_lags=1, - lag_q=True, - slx_lags=0, - slx_vars="All", - robust=None, - gwk=None, - sig2n_k=False, - spat_diag=True, - spat_impacts="simple", - vm=False, - name_y=None, - name_x=None, - name_yend=None, - name_q=None, - name_w=None, - name_gwk=None, - name_ds=None, - latex=False, - hard_bound=False, + self, + y, + x, + yend=None, + q=None, + w=None, + w_lags=1, + lag_q=True, + slx_lags=0, + slx_vars="All", + robust=None, + gwk=None, + sig2n_k=False, + spat_diag=True, + spat_impacts="simple", + vm=False, + name_y=None, + name_x=None, + name_yend=None, + name_q=None, + name_w=None, + name_gwk=None, + name_ds=None, + latex=False, + hard_bound=False, ): - n = USER.check_arrays(x, yend, q) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) @@ -543,24 +539,27 @@ def __init__( ) spat_diag = False x_constant, name_x, warn = USER.check_constant(x, name_x) - name_x = USER.set_name_x(name_x, x_constant) # need to check for None and set defaults + name_x = USER.set_name_x( + name_x, x_constant + ) # need to check for None and set defaults # kx and wkx are used to replace complex calculation for output if slx_lags > 0: # adjust for flexwx - if (isinstance(slx_vars,list)): # slx_vars has True,False - if len(slx_vars) != x.shape[1] : + if isinstance(slx_vars, list): # slx_vars has True,False + if len(slx_vars) != x.shape[1]: raise Exception("slx_vars incompatible with x column dimensions") else: # use slx_vars to extract proper columns workname = name_x[1:] kx = len(workname) - vv = list(compress(workname,slx_vars)) + vv = list(compress(workname, slx_vars)) name_x += USER.set_name_spatial_lags(vv, slx_lags) wkx = slx_vars.count(True) else: kx = len(name_x) - 1 wkx = kx - name_x += USER.set_name_spatial_lags(name_x[1:], slx_lags) # exclude constant - + name_x += USER.set_name_spatial_lags( + name_x[1:], slx_lags + ) # exclude constant set_warn(self, warn) BaseGM_Lag.__init__( @@ -581,7 +580,12 @@ def __init__( self.rho = self.betas[-1] self.predy_e, self.e_pred, warn = sp_att( - w, self.y, self.predy, self.yend[:, -1].reshape(self.n, 1), self.rho, hard_bound=hard_bound + w, + self.y, + self.predy, + self.yend[:, -1].reshape(self.n, 1), + self.rho, + hard_bound=hard_bound, ) set_warn(self, warn) self.title = "SPATIAL TWO STAGE LEAST SQUARES" @@ -599,25 +603,41 @@ def __init__( if slx_lags > 0: # need to remove all but last SLX variables from name_x self.name_x0 = [] self.name_x0.append(self.name_x[0]) # constant - if (isinstance(slx_vars,list)): # boolean list passed + if isinstance(slx_vars, list): # boolean list passed # x variables that were not lagged - self.name_x0.extend(list(compress(self.name_x[1:],[not i for i in slx_vars]))) + self.name_x0.extend( + list(compress(self.name_x[1:], [not i for i in slx_vars])) + ) # last wkx variables self.name_x0.extend(self.name_x[-wkx:]) - else: - okx = int((self.k - self.kstar - 1) / (slx_lags + 1)) # number of original exogenous vars + okx = int( + (self.k - self.kstar - 1) / (slx_lags + 1) + ) # number of original exogenous vars self.name_x0.extend(self.name_x[-okx:]) - self.name_q.extend(USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q)) + self.name_q.extend( + USER.set_name_q_sp(self.name_x0, w_lags, self.name_q, lag_q) + ) - #var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] - var_types = ['x'] * (kx + 1) + ['wx'] * wkx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + # var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + var_types = ( + ["x"] * (kx + 1) + + ["wx"] * wkx * slx_lags + + ["yend"] * (len(self.name_yend) - 1) + + ["rho"] + ) else: - self.name_q.extend(USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q)) - var_types = ['x'] * len(self.name_x) + ['yend'] * (len(self.name_yend) - 1) + ['rho'] + self.name_q.extend( + USER.set_name_q_sp(self.name_x, w_lags, self.name_q, lag_q) + ) + var_types = ( + ["x"] * len(self.name_x) + + ["yend"] * (len(self.name_yend) - 1) + + ["rho"] + ) self.name_h = USER.set_name_h(self.name_x, self.name_q) self.robust = USER.set_robust(robust) @@ -626,16 +646,18 @@ def __init__( self.slx_lags = slx_lags self.slx_vars = slx_vars - self.output = pd.DataFrame(self.name_x + self.name_yend, columns=['var_names']) - self.output['var_type'] = var_types - self.output['regime'], self.output['equation'] = (0, 0) + self.output = pd.DataFrame(self.name_x + self.name_yend, columns=["var_names"]) + self.output["var_type"] = var_types + self.output["regime"], self.output["equation"] = (0, 0) self.other_top = _spat_pseudo_r2(self) diag_out = None if spat_diag: - diag_out = _spat_diag_out(self, w, 'yend') + diag_out = _spat_diag_out(self, w, "yend") if spat_impacts: - self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags,slx_vars) + self.sp_multipliers, impacts_str = _summary_impacts( + self, w, spat_impacts, slx_lags, slx_vars + ) try: diag_out += impacts_str except TypeError: diff --git a/spreg/twosls_sp_regimes.py b/spreg/twosls_sp_regimes.py index a427a70..ec7545b 100644 --- a/spreg/twosls_sp_regimes.py +++ b/spreg/twosls_sp_regimes.py @@ -11,14 +11,22 @@ from . import user_output as USER from .twosls_regimes import TSLS_Regimes, _optimal_weight from .twosls import BaseTSLS -from .utils import set_endog, set_endog_sparse, sp_att, set_warn, sphstack, spdot, optim_k +from .utils import ( + set_endog, + set_endog_sparse, + sp_att, + set_warn, + sphstack, + spdot, + optim_k, +) from .robust import hac_multi from .output import output, _spat_diag_out, _spat_pseudo_r2, _summary_impacts from .skater_reg import Skater_reg from .twosls_sp import BaseGM_Lag -class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): +class GM_Lag_Regimes(TSLS_Regimes, REGI.Regimes_Frame): """ Spatial two stage least squares (S2SLS) with regimes; :cite:`Anselin1988` @@ -456,7 +464,7 @@ def __init__( w_lags=1, slx_lags=0, lag_q=True, - robust='white', + robust="white", gwk=None, sig2n_k=False, spat_diag=True, @@ -478,17 +486,19 @@ def __init__( latex=False, hard_bound=False, ): - n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) yend, q, name_yend, name_q = USER.check_endog([yend, q], [name_yend, name_q]) w = USER.check_weights(w, y, w_required=True, slx_lags=slx_lags) USER.check_robust(robust, gwk) if regime_lag_sep and not regime_err_sep: - set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") + set_warn(self, "regime_err_sep set to True when regime_lag_sep=True.") regime_err_sep = True if regime_err_sep and not regime_lag_sep: - set_warn(self, "Groupwise heteroskedasticity is not currently available for this method,\n so regime_err_sep has been set to False.") + set_warn( + self, + "Groupwise heteroskedasticity is not currently available for this method,\n so regime_err_sep has been set to False.", + ) regime_err_sep = False if robust == "hac": if regime_err_sep: @@ -514,22 +524,33 @@ def __init__( self.name_regimes = USER.set_name_ds(name_regimes) self.constant_regi = constant_regi if slx_lags > 0: - yend2, q2, wx = set_endog(y, x_constant, w, yend, q, w_lags, lag_q, slx_lags) + yend2, q2, wx = set_endog( + y, x_constant, w, yend, q, w_lags, lag_q, slx_lags + ) x_constant = np.hstack((x_constant, wx)) name_slx = USER.set_name_spatial_lags(name_x, slx_lags) - name_q.extend(USER.set_name_q_sp(name_slx[-len(name_x):], w_lags, name_q, lag_q, force_all=True)) + name_q.extend( + USER.set_name_q_sp( + name_slx[-len(name_x) :], w_lags, name_q, lag_q, force_all=True + ) + ) name_x += name_slx - if cols2regi == 'all': + if cols2regi == "all": cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False)[0:-1] + constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False + )[0:-1] else: cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False + ) else: - name_q.extend(USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True)) + name_q.extend( + USER.set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=True) + ) yend2, q2 = yend, q cols2regi = REGI.check_cols2regi( - constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False) + constant_regi, cols2regi, x_constant, yend=yend2, add_cons=False + ) self.n = x_constant.shape[0] self.cols2regi = cols2regi self.regimes_set = REGI._get_regimes_set(regimes) @@ -632,25 +653,38 @@ def __init__( self.sp_att_reg(w_i, regi_ids, yend2[:, -1].reshape(self.n, 1)) else: self.rho = self.betas[-1] - self.output.iat[-1, self.output.columns.get_loc('var_type')] = 'rho' + self.output.iat[-1, self.output.columns.get_loc("var_type")] = "rho" self.predy_e, self.e_pred, warn = sp_att( - w, self.y, self.predy, yend2[:, -1].reshape(self.n, 1), self.rho, hard_bound=hard_bound) + w, + self.y, + self.predy, + yend2[:, -1].reshape(self.n, 1), + self.rho, + hard_bound=hard_bound, + ) set_warn(self, warn) self.regime_lag_sep = regime_lag_sep self.title = "SPATIAL " + self.title if slx_lags > 0: for m in self.regimes_set: - r_output = self.output[(self.output['regime'] == str(m)) & (self.output['var_type'] == 'x')] - wx_index = r_output.index[-((len(r_output)-1)//(slx_lags+1)) * slx_lags:] - self.output.loc[wx_index, 'var_type'] = 'wx' + r_output = self.output[ + (self.output["regime"] == str(m)) + & (self.output["var_type"] == "x") + ] + wx_index = r_output.index[ + -((len(r_output) - 1) // (slx_lags + 1)) * slx_lags : + ] + self.output.loc[wx_index, "var_type"] = "wx" self.title = " SPATIAL 2SLS WITH SLX (SPATIAL DURBIN MODEL) - REGIMES" self.other_top = _spat_pseudo_r2(self) self.slx_lags = slx_lags diag_out = None if spat_diag: - diag_out = _spat_diag_out(self, w, 'yend') + diag_out = _spat_diag_out(self, w, "yend") if spat_impacts: - self.sp_multipliers, impacts_str = _summary_impacts(self, w, spat_impacts, slx_lags, regimes=True) + self.sp_multipliers, impacts_str = _summary_impacts( + self, w, spat_impacts, slx_lags, regimes=True + ) try: diag_out += impacts_str except TypeError: @@ -789,7 +823,9 @@ def GM_Lag_Regimes_Multi( self.name_h, ) = ([], [], [], [], [], []) counter = 0 - self.output = pd.DataFrame(columns=['var_names', 'var_type', 'regime', 'equation']) + self.output = pd.DataFrame( + columns=["var_names", "var_type", "regime", "equation"] + ) for r in self.regimes_set: """ if is_win: @@ -806,7 +842,8 @@ def GM_Lag_Regimes_Multi( results[r].y, results[r].predy, results[r].yend[:, -1].reshape(results[r].n, 1), - results[r].rho, hard_bound=hard_bound + results[r].rho, + hard_bound=hard_bound, ) set_warn(results[r], warn) results[r].w = w_i[r] @@ -814,21 +851,13 @@ def GM_Lag_Regimes_Multi( (counter * self.kr) : ((counter + 1) * self.kr), (counter * self.kr) : ((counter + 1) * self.kr), ] = results[r].vm - self.betas[ - (counter * self.kr) : ((counter + 1) * self.kr), - ] = results[r].betas - self.u[ - regi_ids[r], - ] = results[r].u - self.predy[ - regi_ids[r], - ] = results[r].predy - self.predy_e[ - regi_ids[r], - ] = results[r].predy_e - self.e_pred[ - regi_ids[r], - ] = results[r].e_pred + self.betas[(counter * self.kr) : ((counter + 1) * self.kr),] = results[ + r + ].betas + self.u[regi_ids[r],] = results[r].u + self.predy[regi_ids[r],] = results[r].predy + self.predy_e[regi_ids[r],] = results[r].predy_e + self.e_pred[regi_ids[r],] = results[r].e_pred self.name_y += results[r].name_y self.name_x += results[r].name_x self.name_yend += results[r].name_yend @@ -837,24 +866,38 @@ def GM_Lag_Regimes_Multi( self.name_h += results[r].name_h if r == self.regimes_set[0]: self.hac_var = np.zeros((self.n, results[r].h.shape[1]), float) - self.hac_var[ - regi_ids[r], - ] = results[r].h + self.hac_var[regi_ids[r],] = results[r].h results[r].other_top = _spat_pseudo_r2(results[r]) results[r].other_mid = "" if slx_lags > 0: kx = (results[r].k - results[r].kstar - 1) // (slx_lags + 1) - var_types = ['x'] * (kx + 1) + ['wx'] * kx * slx_lags + ['yend'] * (len(results[r].name_yend) - 1) + ['rho'] + var_types = ( + ["x"] * (kx + 1) + + ["wx"] * kx * slx_lags + + ["yend"] * (len(results[r].name_yend) - 1) + + ["rho"] + ) else: - var_types = ['x'] * len(results[r].name_x) + ['yend'] * (len(results[r].name_yend)-1) + ['rho'] - results[r].output = pd.DataFrame({'var_names': results[r].name_x + results[r].name_yend, - 'var_type': var_types, - 'regime': r, 'equation': r}) + var_types = ( + ["x"] * len(results[r].name_x) + + ["yend"] * (len(results[r].name_yend) - 1) + + ["rho"] + ) + results[r].output = pd.DataFrame( + { + "var_names": results[r].name_x + results[r].name_yend, + "var_type": var_types, + "regime": r, + "equation": r, + } + ) self.output = pd.concat([self.output, results[r].output], ignore_index=True) if spat_diag: - results[r].other_mid += _spat_diag_out(results[r], results[r].w, 'yend') + results[r].other_mid += _spat_diag_out(results[r], results[r].w, "yend") if spat_impacts: - results[r].sp_multipliers, impacts_str = _summary_impacts(results[r], results[r].w, spat_impacts, slx_lags) + results[r].sp_multipliers, impacts_str = _summary_impacts( + results[r], results[r].w, spat_impacts, slx_lags + ) results[r].other_mid += impacts_str counter += 1 self.multi = results @@ -866,7 +909,7 @@ def GM_Lag_Regimes_Multi( "Residuals treated as homoskedastic for the purpose of diagnostics.", ) self.chow = REGI.Chow(self) - #if spat_diag: + # if spat_diag: # self._get_spat_diag_props(y, x, w, yend, q, w_lags, lag_q) output(reg=self, vm=vm, robust=robust, other_end=False, latex=latex) @@ -976,9 +1019,7 @@ def _work( class GM_Lag_Endog_Regimes(GM_Lag_Regimes): - def __init__( - self, y, x, w, n_clusters=None, quorum=-np.inf, trace=True, **kwargs): - + def __init__(self, y, x, w, n_clusters=None, quorum=-np.inf, trace=True, **kwargs): n = USER.check_arrays(y, x) y, name_y = USER.check_y(y, n, name_y) w = USER.check_weights(w, y, w_required=True) @@ -988,29 +1029,53 @@ def __init__( if not n_clusters: if quorum < 0: - quorum = np.max([(x.shape[1]+1)*10, 30]) - n_clusters_opt = x.shape[0]*0.70//quorum + quorum = np.max([(x.shape[1] + 1) * 10, 30]) + n_clusters_opt = x.shape[0] * 0.70 // quorum if n_clusters_opt < 2: raise ValueError( - "The combination of the values of `N` and `quorum` is not compatible with regimes estimation.") - sk_reg_results = Skater_reg().fit(n_clusters_opt, w, x_std, {'reg':BaseGM_Lag,'y':y,'x':x,'w':w}, quorum=quorum, trace=True) - n_clusters = optim_k([sk_reg_results._trace[i][1][2] for i in range(1, len(sk_reg_results._trace))]) - self.clusters = sk_reg_results._trace[n_clusters-1][0] + "The combination of the values of `N` and `quorum` is not compatible with regimes estimation." + ) + sk_reg_results = Skater_reg().fit( + n_clusters_opt, + w, + x_std, + {"reg": BaseGM_Lag, "y": y, "x": x, "w": w}, + quorum=quorum, + trace=True, + ) + n_clusters = optim_k( + [ + sk_reg_results._trace[i][1][2] + for i in range(1, len(sk_reg_results._trace)) + ] + ) + self.clusters = sk_reg_results._trace[n_clusters - 1][0] else: try: # Call the Skater_reg method based on GM_Lag - sk_reg_results = Skater_reg().fit(n_clusters, w, x_std, {'reg':BaseGM_Lag,'y':y,'x':x,'w':w}, quorum=quorum, trace=trace) + sk_reg_results = Skater_reg().fit( + n_clusters, + w, + x_std, + {"reg": BaseGM_Lag, "y": y, "x": x, "w": w}, + quorum=quorum, + trace=trace, + ) self.clusters = sk_reg_results.current_labels_ except Exception as e: if str(e) == "one or more input arrays have more columns than rows": - raise ValueError("One or more input ended up with more variables than observations. Please check your setting for `quorum`.") + raise ValueError( + "One or more input ended up with more variables than observations. Please check your setting for `quorum`." + ) else: print("An error occurred:", e) self._trace = sk_reg_results._trace self.SSR = [self._trace[i][1][2] for i in range(1, len(self._trace))] - GM_Lag_Regimes.__init__(self, y, x, regimes=self.clusters, w=w, name_regimes='Skater_reg', **kwargs) + GM_Lag_Regimes.__init__( + self, y, x, regimes=self.clusters, w=w, name_regimes="Skater_reg", **kwargs + ) def _test(): @@ -1062,7 +1127,7 @@ def _test(): name_ds="columbus", name_w="columbus.gal", regime_err_sep=True, - regime_lag_sep = True, + regime_lag_sep=True, robust="white", ) print(model.output) diff --git a/spreg/user_output.py b/spreg/user_output.py index 993058e..f5a236e 100755 --- a/spreg/user_output.py +++ b/spreg/user_output.py @@ -15,8 +15,8 @@ from libpysal import weights from libpysal import graph from scipy.sparse.csr import csr_matrix -from .utils import get_lags # new for flex_wx -from itertools import compress # new for lfex_wx +from .utils import get_lags # new for flex_wx +from itertools import compress # new for lfex_wx def set_name_ds(name_ds): @@ -152,6 +152,7 @@ def set_name_yend_sp(name_y): """ return "W_" + name_y + def set_name_spatial_lags(names, w_lags): """Set the spatial lag names for multiple variables and lag orders" @@ -166,10 +167,11 @@ def set_name_spatial_lags(names, w_lags): """ lag_names = ["W_" + s for s in names] - for i in range(w_lags-1): - lag_names += ["W" + str(i+2) + "_" + s for s in names] + for i in range(w_lags - 1): + lag_names += ["W" + str(i + 2) + "_" + s for s in names] return lag_names + def set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=False): """Set the spatial instrument names in regression; return generic name if user provides no explicit name." @@ -194,7 +196,7 @@ def set_name_q_sp(name_x, w_lags, name_q, lag_q, force_all=False): names = names + name_q sp_inst_names = [] existing_names = set(names) - name_count = {} # Dictionary to store the count of each name + name_count = {} # Dictionary to store the count of each name for name in names: if not name.startswith("W_"): @@ -415,7 +417,7 @@ def check_y(y, n, name_y=None): n : int number of observations - + name_y : string Name of the y variable @@ -423,7 +425,7 @@ def check_y(y, n, name_y=None): ------- y : anything Object passed by the user to a regression class - + name_y : string Name of the y variable @@ -452,7 +454,7 @@ def check_y(y, n, name_y=None): name_y = y.columns.to_list() if len(name_y) == 1: name_y = name_y[0] - + y = y.to_numpy() if not isinstance(y, np.ndarray): print(y.__class__.__name__) @@ -473,6 +475,7 @@ def check_y(y, n, name_y=None): ) return y, name_y + def check_endog(arrays, names): """Check if each of the endogenous arrays passed by a user to a regression class are pandas objects. In this case, the function converts them to numpy arrays and collects their names. @@ -499,6 +502,7 @@ def check_endog(arrays, names): arrays[i].shape = (arrays[i].shape[0], 1) return (*arrays, *names) + def check_weights(w, y, w_required=False, time=False, slx_lags=0): """Check if the w parameter passed by the user is a libpysal.W object and check that its dimensionality matches the y parameter. Note that this @@ -547,17 +551,18 @@ def check_weights(w, y, w_required=False, time=False, slx_lags=0): if w_required == True or (w is not None) or slx_lags > 0: if isinstance(w, graph.Graph): w = w.to_W() - + if w == None: raise Exception("A weights matrix w must be provided to run this method.") - + if not isinstance(w, weights.W): from warnings import warn + warn("w must be API-compatible pysal weights object") # check for kernel weights, if so insert zeros on diagonal if slx_lags == 1 and isinstance(w, weights.Kernel): - w = weights.fill_diagonal(w,val=0.0) + w = weights.fill_diagonal(w, val=0.0) if w.n != y.shape[0] and time == False: raise Exception("y must have n rows, and w must be an nxn PySAL W object") @@ -565,7 +570,7 @@ def check_weights(w, y, w_required=False, time=False, slx_lags=0): # check to make sure all entries equal 0 if diag.min() != 0 or diag.max() != 0: raise Exception("All entries on diagonal must equal 0.") - + return w @@ -636,14 +641,15 @@ def check_robust(robust, wk): # NOTE: we are not checking for the case of exactly 1.0 ### raise Exception("Off-diagonal entries must be less than 1.") elif robust.lower() == "white" or robust.lower() == "ogmm": - # if wk: - # raise Exception("White requires that wk be set to None") - pass # these options are not affected by wk + # if wk: + # raise Exception("White requires that wk be set to None") + pass # these options are not affected by wk else: raise Exception( "invalid value passed to robust, see docs for valid options" ) + ''' Deprecated in 1.6.1 def check_spat_diag(spat_diag, w): """Check if there is a w parameter passed by the user if the user also @@ -686,6 +692,7 @@ def check_spat_diag(spat_diag, w): raise Exception("w must be a libpysal.W object to run spatial diagnostics") ''' + def check_reg_list(regimes, name_regimes, n): """Check if the regimes parameter passed by the user is a valid list of regimes. Note: this does not check if the regimes are valid for the @@ -696,7 +703,7 @@ def check_reg_list(regimes, name_regimes, n): regimes : list or np.array or pd.Series Object passed by the user to a regression class name_regimes : string - Name of the regimes variable + Name of the regimes variable n : int number of observations @@ -724,11 +731,6 @@ def check_reg_list(regimes, name_regimes, n): return regimes, name_regimes - - - - - def check_regimes(reg_set, N=None, K=None): """Check if there are at least two regimes @@ -824,7 +826,7 @@ def check_constant(x, name_x=None, just_rem=False): return x_constant, keep_x, warn -def flex_wx(w,x,name_x,constant=True,slx_lags=1,slx_vars="All"): +def flex_wx(w, x, name_x, constant=True, slx_lags=1, slx_vars="All"): """ Adds spatially lagged variables to an existing x matrix with or without a constant term Adds variable names prefaced by W_ for the lagged variables @@ -849,24 +851,24 @@ def flex_wx(w,x,name_x,constant=True,slx_lags=1,slx_vars="All"): """ if constant == True: - xwork = x[:,1:] - xnamework = name_x[1:] + xwork = x[:, 1:] + xnamework = name_x[1:] else: xwork = x xnamework = name_x - - if isinstance(slx_vars,list): + + if isinstance(slx_vars, list): if len(slx_vars) == len(xnamework): - xwork = xwork[:,slx_vars] - xnamework = list(compress(xnamework,slx_vars)) + xwork = xwork[:, slx_vars] + xnamework = list(compress(xnamework, slx_vars)) else: raise Exception("Mismatch number of columns and length slx_vars") - - lagx = get_lags(w,xwork,slx_lags) - xlagname = set_name_spatial_lags(xnamework,slx_lags) - bigx = np.hstack((x,lagx)) + + lagx = get_lags(w, xwork, slx_lags) + xlagname = set_name_spatial_lags(xnamework, slx_lags) + bigx = np.hstack((x, lagx)) bignamex = name_x + xlagname - return(bigx,bignamex) + return (bigx, bignamex) def _test(): diff --git a/spreg/utils.py b/spreg/utils.py index 639954b..54bdb5f 100755 --- a/spreg/utils.py +++ b/spreg/utils.py @@ -17,9 +17,7 @@ import copy - class RegressionPropsY(object): - """ Helper class that adds common regression properties to any regression class that inherits it. It takes no parameters. See BaseOLS for example @@ -81,7 +79,6 @@ def std_y(self, val): class RegressionPropsVM(object): - """ Helper class that adds common regression properties to any regression class that inherits it. It takes no parameters. See BaseOLS for example @@ -109,9 +106,9 @@ def utu(self): return self._cache["utu"] except AttributeError: self._cache = {} - self._cache["utu"] = np.sum(self.u ** 2) + self._cache["utu"] = np.sum(self.u**2) except KeyError: - self._cache["utu"] = np.sum(self.u ** 2) + self._cache["utu"] = np.sum(self.u**2) return self._cache["utu"] @utu.setter @@ -323,7 +320,9 @@ def _moments2eqs(A1, s, u): return [G, g] -def optim_moments(moments_in, vcX=np.array([0]), all_par=False, start=None, hard_bound=False): +def optim_moments( + moments_in, vcX=np.array([0]), all_par=False, start=None, hard_bound=False +): """ Optimization of moments ... @@ -345,7 +344,7 @@ def optim_moments(moments_in, vcX=np.array([0]), all_par=False, start=None, hard hard_bound : boolean If true, raises an exception if the estimated spatial autoregressive parameter is outside the maximum/minimum bounds. - + Returns ------- x, f, d : tuple @@ -392,7 +391,9 @@ def optim_moments(moments_in, vcX=np.array([0]), all_par=False, start=None, hard if hard_bound: if abs(lambdaX[0][0]) >= 0.99: - raise Exception("Spatial parameter was outside the bounds of -0.99 and 0.99") + raise Exception( + "Spatial parameter was outside the bounds of -0.99 and 0.99" + ) if all_par: return lambdaX[0] @@ -421,7 +422,7 @@ def foptim_par(par, moments): """ vv = np.dot(moments[0], par) vv2 = moments[1] - vv - return sum(vv2 ** 2) + return sum(vv2**2) def get_spFilter(w, lamb, sf): @@ -498,6 +499,7 @@ def get_lags(w, x, w_lags): spat_lags = sphstack(spat_lags, lag) return spat_lags + def get_lags_split(w, x, max_lags, split_at): """ Calculates a given order of spatial lags and all the smaller orders, @@ -524,7 +526,7 @@ def get_lags_split(w, x, max_lags, split_at): rs_l = lag = lag_spatial(w, x) rs_h = None if 0 < split_at < max_lags: - for _ in range(split_at-1): + for _ in range(split_at - 1): lag = lag_spatial(w, lag) rs_l = sphstack(rs_l, lag) @@ -532,10 +534,13 @@ def get_lags_split(w, x, max_lags, split_at): lag = lag_spatial(w, lag) rs_h = sphstack(rs_h, lag) if i > 0 else lag else: - raise ValueError("max_lags must be greater than split_at and split_at must be greater than 0") + raise ValueError( + "max_lags must be greater than split_at and split_at must be greater than 0" + ) return rs_l, rs_h + def inverse_prod( w, data, @@ -617,11 +622,13 @@ def inverse_prod( except: matrix = la.inv(np.eye(w.shape[0]) - (scalar * w)) if post_multiply: -# inv_prod = spdot(data.T, matrix) - inv_prod = np.matmul(data.T,matrix) # inverse matrix is dense, wrong type in spdot + # inv_prod = spdot(data.T, matrix) + inv_prod = np.matmul( + data.T, matrix + ) # inverse matrix is dense, wrong type in spdot else: -# inv_prod = spdot(matrix, data) - inv_prod = np.matmul(matrix,data) + # inv_prod = spdot(matrix, data) + inv_prod = np.matmul(matrix, data) else: raise Exception("Invalid method selected for inversion.") return inv_prod @@ -671,13 +678,13 @@ def power_expansion( return running_total -def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0,slx_vars="All"): +def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0, slx_vars="All"): # Create spatial lag of y yl = lag_spatial(w, y) # spatial and non-spatial instruments if issubclass(type(yend), np.ndarray): if slx_lags > 0: - lag_x, lag_xq = get_lags_split(w, x, slx_lags+1, slx_lags) + lag_x, lag_xq = get_lags_split(w, x, slx_lags + 1, slx_lags) else: lag_xq = x if lag_q: @@ -689,7 +696,7 @@ def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0,slx_vars="All"): yend = sphstack(yend, yl) elif yend == None: # spatial instruments only if slx_lags > 0: - lag_x, lag_xq = get_lags_split(w, x, slx_lags+w_lags, slx_lags) + lag_x, lag_xq = get_lags_split(w, x, slx_lags + w_lags, slx_lags) else: lag_xq = get_lags(w, x, w_lags) q = lag_xq @@ -699,18 +706,17 @@ def set_endog(y, x, w, yend, q, w_lags, lag_q, slx_lags=0,slx_vars="All"): if slx_lags == 0: return yend, q else: # ajdust returned lag_x here using slx_vars - if (isinstance(slx_vars,list)): # slx_vars has True,False - if len(slx_vars) != x.shape[1] : + if isinstance(slx_vars, list): # slx_vars has True,False + if len(slx_vars) != x.shape[1]: raise Exception("slx_vars incompatible with x column dimensions") else: # use slx_vars to extract proper columns vv = slx_vars * slx_lags - lag_x = lag_x[:,vv] + lag_x = lag_x[:, vv] return yend, q, lag_x else: # slx_vars is "All" return yend, q, lag_x - def set_endog_sparse(y, x, w, yend, q, w_lags, lag_q): """ Same as set_endog, but with a sparse object passed as weights instead of W object. @@ -799,12 +805,13 @@ def RegressionProps_basic( if sig2 is not None: reg.sig2 = sig2 elif sig2n_k: - reg.sig2 = np.sum(reg.u ** 2) / (reg.n - reg.k) + reg.sig2 = np.sum(reg.u**2) / (reg.n - reg.k) else: - reg.sig2 = np.sum(reg.u ** 2) / reg.n + reg.sig2 = np.sum(reg.u**2) / reg.n if vm is not None: reg.vm = vm + def optim_k(trace, window_size=None): """ Finds optimal number of regimes for the endogenous spatial regimes model @@ -842,19 +849,21 @@ def optim_k(trace, window_size=None): N = len(trace) if not window_size: - window_size = N//4 # Mojena suggests from 70% to 90% - std_dev = [np.std(trace[i:i+window_size]) for i in range(N - window_size + 1)] - ma = np.convolve(trace, np.ones(window_size)/window_size, mode='valid') + window_size = N // 4 # Mojena suggests from 70% to 90% + std_dev = [np.std(trace[i : i + window_size]) for i in range(N - window_size + 1)] + ma = np.convolve(trace, np.ones(window_size) / window_size, mode="valid") treshold = [True] i = 0 while treshold[-1] and i < (N - window_size): - b = (6/(window_size*(window_size*window_size-1)) - )*((2*np.sum(np.arange(1, i+2)*trace[window_size-1:i+window_size]) - )-((window_size+1)*np.sum(trace[window_size-1:i+window_size]))) - l = (window_size-1)*b/2 - treshold.append(trace[i+window_size] < ma[i] - b - l - 2.75*std_dev[i]) + b = (6 / (window_size * (window_size * window_size - 1))) * ( + (2 * np.sum(np.arange(1, i + 2) * trace[window_size - 1 : i + window_size])) + - ((window_size + 1) * np.sum(trace[window_size - 1 : i + window_size])) + ) + l = (window_size - 1) * b / 2 + treshold.append(trace[i + window_size] < ma[i] - b - l - 2.75 * std_dev[i]) i += 1 - return i+window_size + return i + window_size + def _test(): import doctest