diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..e2eff3df --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# .git-blame-ignore-revs +# ignore initial move to black formatting +09acdfbf8dd3979d1d18b302b9d4277104e7b579 +23c567a5a901d606511c3b60e3c780011137bf5c \ No newline at end of file diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 7288417c..59d0674c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1,4 +1,4 @@ -name: Build Package and Test Source Code [Python 3.9, 3.10] +name: Build Package and Test Source Code [Python 3.9, 3.10, 3.11] on: [push, pull_request] @@ -8,7 +8,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11"] steps: - name: Checkout @@ -34,7 +34,7 @@ jobs: shell: bash -l {0} working-directory: ./ run: | - pytest -m 'not needs_puf' --pycodestyle --cov=./ --cov-report=xml + pytest -m 'not needs_puf' --cov=./ --cov-report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: diff --git a/.github/workflows/check_formatting.yml b/.github/workflows/check_formatting.yml new file mode 100644 index 00000000..5c42d997 --- /dev/null +++ b/.github/workflows/check_formatting.yml @@ -0,0 +1,14 @@ +name: Check Black formatting + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: psf/black@stable + with: + options: "-l 79 --check" + src: "." \ No newline at end of file diff --git a/ccc/__init__.py b/ccc/__init__.py index 0054aebf..1749abbb 100644 --- a/ccc/__init__.py +++ b/ccc/__init__.py @@ -1,8 +1,9 @@ """ Specify what is available to import from the ccc package. """ + from ccc.parameters import * from ccc.data import * from ccc.calculator import * -__version__ = '0.0.0' +__version__ = "0.0.0" diff --git a/ccc/calcfunctions.py b/ccc/calcfunctions.py index a05e1aec..7d552d68 100644 --- a/ccc/calcfunctions.py +++ b/ccc/calcfunctions.py @@ -3,11 +3,13 @@ from ccc.constants import TAX_METHODS from ccc.utils import str_modified +pd.set_option("future.no_silent_downcasting", True) + ENFORCE_CHECKS = True def update_depr_methods(df, p, dp): - ''' + """ Updates depreciation methods per changes from defaults that are specified by user. @@ -21,38 +23,44 @@ def update_depr_methods(df, p, dp): df (Pandas DataFrame): assets by type and tax treatment with updated tax depreciation methods - ''' + """ # update tax_deprec_rates based on user defined parameters # create dataframe with depreciation policy parameters deprec_df = pd.DataFrame(dp.asset) - print("deprec_df", deprec_df.head()) # split out value into two columns deprec_df = deprec_df.join( - pd.DataFrame(deprec_df.pop('value').values.tolist())) - print("deprec_df 2", deprec_df.head()) + pd.DataFrame(deprec_df.pop("value").values.tolist()) + ) # drop information duplicated in asset dataframe - deprec_df.drop(columns=['asset_name', 'minor_asset_group', - 'major_asset_group'], inplace=True) + deprec_df.drop( + columns=["asset_name", "minor_asset_group", "major_asset_group"], + inplace=True, + ) # merge depreciation policy parameters to asset dataframe - df.drop(columns=deprec_df.keys(), inplace=True, errors='ignore') - df = df.merge(deprec_df, how='left', left_on='bea_asset_code', - right_on='BEA_code') + df.drop(columns=deprec_df.keys(), inplace=True, errors="ignore") + df = df.merge( + deprec_df, how="left", left_on="bea_asset_code", right_on="BEA_code" + ) # add bonus depreciation to tax deprec parameters dataframe # ** UPDATE THIS - maybe including bonus in new asset deprec JSON** - df['bonus'] = df['GDS_life'].apply(str_modified) - df['bonus'].replace(p.bonus_deprec, inplace=True) + df["bonus"] = df["GDS_life"].apply(str_modified) + df.replace({"bonus": p.bonus_deprec}, inplace=True) + # make bonus float format + df["bonus"] = df["bonus"].astype(float) # Compute b - df['b'] = df['method'] - df['b'].replace(TAX_METHODS, inplace=True) - df.loc[df['system'] == 'ADS', 'Y'] = df.loc[df['system'] == 'ADS', - 'ADS_life'] - df.loc[df['system'] == 'GDS', 'Y'] = df.loc[df['system'] == 'GDS', - 'GDS_life'] + df["b"] = df["method"] + df.replace({"b": TAX_METHODS}, regex=True, inplace=True) + df.loc[df["system"] == "ADS", "Y"] = df.loc[ + df["system"] == "ADS", "ADS_life" + ] + df.loc[df["system"] == "GDS", "Y"] = df.loc[ + df["system"] == "GDS", "GDS_life" + ] return df def dbsl(Y, b, bonus, r): - r''' + r""" Makes the calculation for the declining balance with a switch to straight line (DBSL) method of depreciation. @@ -72,21 +80,24 @@ def dbsl(Y, b, bonus, r): z (array_like): net present value of depreciation deductions for $1 of investment - ''' + """ beta = b / Y Y_star = Y * (1 - (1 / b)) - z = ( - bonus + ((1 - bonus) * (((beta / (beta + r)) * - (1 - np.exp(-1 * (beta + r) * Y_star))) + - ((np.exp(-1 * beta * Y_star) / - ((Y - Y_star) * r)) * - (np.exp(-1 * r * Y_star) - - np.exp(-1 * r * Y)))))) + z = bonus + ( + (1 - bonus) + * ( + ((beta / (beta + r)) * (1 - np.exp(-1 * (beta + r) * Y_star))) + + ( + (np.exp(-1 * beta * Y_star) / ((Y - Y_star) * r)) + * (np.exp(-1 * r * Y_star) - np.exp(-1 * r * Y)) + ) + ) + ) return z def sl(Y, bonus, r): - r''' + r""" Makes the calculation for straight line (SL) method of depreciation. .. math:: @@ -101,13 +112,13 @@ def sl(Y, bonus, r): z (array_like): net present value of depreciation deductions for $1 of investment - ''' + """ z = bonus + ((1 - bonus) * ((1 - np.exp(-1 * r * Y)) / (r * Y))) return z def econ(delta, bonus, r, pi): - r''' + r""" Makes the calculation for the NPV of depreciation deductions using economic depreciation rates. @@ -124,13 +135,13 @@ def econ(delta, bonus, r, pi): z (array_like): net present value of depreciation deductions for $1 of investment - ''' + """ z = bonus + ((1 - bonus) * (delta / (delta + r - pi))) return z def npv_tax_depr(df, r, pi, land_expensing): - ''' + """ Depending on the method of depreciation, makes calls to either the straight line or declining balance calculations. @@ -144,28 +155,28 @@ def npv_tax_depr(df, r, pi, land_expensing): z (Pandas series): NPV of depreciation deductions for all asset types and tax treatments - ''' - idx = (df['method'] == 'DB 200%') | (df['method'] == 'DB 150%') - df.loc[idx, 'z'] = dbsl(df.loc[idx, 'Y'], df.loc[idx, 'b'], - df.loc[idx, 'bonus'], r) - idx = df['method'] == 'SL' - df.loc[idx, 'z'] = sl(df.loc[idx, 'Y'], df.loc[idx, 'bonus'], r) - idx = df['method'] == 'Economic' - df.loc[idx, 'z'] = econ(df.loc[idx, 'delta'], df.loc[idx, 'bonus'], - r, pi) - idx = df['method'] == 'Expensing' - df.loc[idx, 'z'] = 1.0 - idx = df['asset_name'] == 'Land' - df.loc[idx, 'z'] = np.squeeze(land_expensing) - idx = df['asset_name'] == 'Inventories' - df.loc[idx, 'z'] = 0.0 # not sure why I have to do this with changes - z = df['z'] + """ + idx = (df["method"] == "DB 200%") | (df["method"] == "DB 150%") + df.loc[idx, "z"] = dbsl( + df.loc[idx, "Y"], df.loc[idx, "b"], df.loc[idx, "bonus"], r + ) + idx = df["method"] == "SL" + df.loc[idx, "z"] = sl(df.loc[idx, "Y"], df.loc[idx, "bonus"], r) + idx = df["method"] == "Economic" + df.loc[idx, "z"] = econ(df.loc[idx, "delta"], df.loc[idx, "bonus"], r, pi) + idx = df["method"] == "Expensing" + df.loc[idx, "z"] = 1.0 + idx = df["asset_name"] == "Land" + df.loc[idx, "z"] = np.squeeze(land_expensing) + idx = df["asset_name"] == "Inventories" + df.loc[idx, "z"] = 0.0 # not sure why I have to do this with changes + z = df["z"] return z def eq_coc(delta, z, w, u, inv_tax_credit, pi, r): - r''' + r""" Compute the cost of capital .. math:: @@ -185,15 +196,16 @@ def eq_coc(delta, z, w, u, inv_tax_credit, pi, r): Returns: rho (array_like): the cost of capital - ''' - rho = (((r - pi + delta) / (1 - u)) * - (1 - inv_tax_credit - u * z) + w - delta) + """ + rho = ( + ((r - pi + delta) / (1 - u)) * (1 - inv_tax_credit - u * z) + w - delta + ) return rho def eq_coc_inventory(u, phi, Y_v, pi, r): - r''' + r""" Compute the cost of capital for inventories .. math:: @@ -210,18 +222,16 @@ def eq_coc_inventory(u, phi, Y_v, pi, r): Returns: rho (scalar): cost of capital for inventories - ''' - rho_FIFO = (((1 / Y_v) * np.log((np.exp(r * Y_v) - u) / - (1 - u))) - pi) - rho_LIFO = ((1 / Y_v) * np.log((np.exp((r - pi) * Y_v) - u) / - (1 - u))) + """ + rho_FIFO = ((1 / Y_v) * np.log((np.exp(r * Y_v) - u) / (1 - u))) - pi + rho_LIFO = (1 / Y_v) * np.log((np.exp((r - pi) * Y_v) - u) / (1 - u)) rho = phi * rho_FIFO + (1 - phi) * rho_LIFO return rho def eq_ucc(rho, delta): - r''' + r""" Compute the user cost of capital .. math:: @@ -234,13 +244,13 @@ def eq_ucc(rho, delta): Returns: ucc (array_like): the user cost of capital - ''' + """ ucc = rho + delta return ucc def eq_metr(rho, r_prime, pi): - r''' + r""" Compute the marginal effective tax rate (METR) .. math:: @@ -254,13 +264,13 @@ def eq_metr(rho, r_prime, pi): Returns: metr (array_like): METR - ''' + """ metr = (rho - (r_prime - pi)) / rho return metr def eq_mettr(rho, s): - r''' + r""" Compute the marginal effective total tax rate (METTR) .. math:: @@ -273,13 +283,13 @@ def eq_mettr(rho, s): Returns: mettr (array_like): METTR - ''' + """ mettr = (rho - s) / rho return mettr def eq_tax_wedge(rho, s): - r''' + r""" Compute the tax wedge .. math:: @@ -292,13 +302,13 @@ def eq_tax_wedge(rho, s): Returns: wedge (array_like): tax wedge - ''' + """ wedge = rho - s return wedge def eq_eatr(rho, metr, p, u): - r''' + r""" Compute the effective average tax rate (EATR). .. math:: @@ -315,6 +325,6 @@ def eq_eatr(rho, metr, p, u): Returns: eatr (array_like): EATR - ''' + """ eatr = ((p - rho) / p) * u + (rho / p) * metr return eatr diff --git a/ccc/calculator.py b/ccc/calculator.py index 61316fff..9050a471 100644 --- a/ccc/calculator.py +++ b/ccc/calculator.py @@ -1,6 +1,7 @@ -''' +""" Cost-of-Capital-Calculator Calculator class. -''' +""" + # CODING-STYLE CHECKS: # pycodestyle calculator.py # pylint --disable=locally-disabled calculator.py @@ -10,33 +11,56 @@ import copy import pandas as pd import numpy as np -from ccc.calcfunctions import (update_depr_methods, npv_tax_depr, - eq_coc, eq_coc_inventory, eq_ucc, - eq_metr, eq_mettr, eq_tax_wedge, eq_eatr) +from ccc.calcfunctions import ( + update_depr_methods, + npv_tax_depr, + eq_coc, + eq_coc_inventory, + eq_ucc, + eq_metr, + eq_mettr, + eq_tax_wedge, + eq_eatr, +) from ccc.parameters import Specification, DepreciationParams from ccc.data import Assets from ccc.utils import wavg, diff_two_tables, save_return_table -from ccc.constants import (VAR_DICT, MAJOR_IND_ORDERED, OUTPUT_VAR_LIST, - OUTPUT_DATA_FORMATS) +from ccc.constants import ( + VAR_DICT, + MAJOR_IND_ORDERED, + OUTPUT_VAR_LIST, + OUTPUT_DATA_FORMATS, +) + # import pdb # importing Bokeh libraries from bokeh.plotting import figure from bokeh.transform import dodge -from bokeh.models import (ColumnDataSource, CustomJS, LabelSet, Title, - CustomJSTickFormatter, BoxAnnotation, HoverTool, - NumeralTickFormatter, Span, TabPanel, Tabs) +from bokeh.models import ( + ColumnDataSource, + CustomJS, + LabelSet, + Title, + CustomJSTickFormatter, + BoxAnnotation, + HoverTool, + NumeralTickFormatter, + Span, + TabPanel, + Tabs, +) from bokeh.models.widgets import RadioButtonGroup from bokeh.models.tickers import FixedTicker from bokeh.layouts import gridplot, column # import styles and callback -from ccc.styles import (PLOT_FORMATS, TITLE_FORMATS, RED, BLUE) +from ccc.styles import PLOT_FORMATS, TITLE_FORMATS, RED, BLUE from ccc.controls_callback_script import CONTROLS_CALLBACK_SCRIPT -class Calculator(): - ''' +class Calculator: + """ Constructor for the Calculator class. Args: @@ -73,7 +97,8 @@ class Calculator(): >>> `params2 = Specifications(...reform parameters...)`` >>> `calc2 = Calculator(p=params2, assets=rec) # reform` - ''' + """ + # pylint: disable=too-many-public-methods def __init__(self, p=None, dp=None, assets=None, verbose=True): @@ -81,98 +106,125 @@ def __init__(self, p=None, dp=None, assets=None, verbose=True): if isinstance(p, Specification): self.__p = copy.deepcopy(p) else: - raise ValueError('must specify p as a Specification object') + raise ValueError("must specify p as a Specification object") if isinstance(dp, DepreciationParams): self.__dp = copy.deepcopy(dp) else: - raise ValueError('must specify p as an DepreciationParams object') + raise ValueError("must specify p as an DepreciationParams object") if isinstance(assets, Assets): self.__assets = copy.deepcopy(assets) else: - raise ValueError('must specify assets as a Assets object') + raise ValueError("must specify assets as a Assets object") self.__stored_assets = None def calc_other(self, df): - ''' + """ Calculates variables that depend on z and rho such as metr, ucc Args: - df (Pandas DataFrame): assets by indusry and tax_treatment + df (Pandas DataFrame): assets by industry and tax_treatment with depreciation rates, cost of capital, etc. Returns: df (Pandas DataFrame): input dataframe, but with additional columns (ucc, metr, mettr, tax_wedge, eatr) - ''' - dfs = {'c': df[df['tax_treat'] == 'corporate'].copy(), - 'pt': df[df['tax_treat'] == 'non-corporate'].copy()} + """ + dfs = { + "c": df[df["tax_treat"] == "corporate"].copy(), + "pt": df[df["tax_treat"] == "non-corporate"].copy(), + } # separate into corp and non-corp dataframe here for t in self.__p.entity_list: for f in self.__p.financing_list: - dfs[t]['ucc_' + str(f)] = eq_ucc( - dfs[t]['rho_' + str(f)], dfs[t]['delta']) - dfs[t]['metr_' + str(f)] = eq_metr( - dfs[t]['rho_' + str(f)], self.__p.r_prime[t][f], - self.__p.inflation_rate) - dfs[t]['mettr_' + str(f)] = eq_mettr( - dfs[t]['rho_' + str(f)], self.__p.s[t][f]) - dfs[t]['tax_wedge_' + str(f)] = eq_tax_wedge( - dfs[t]['rho_' + str(f)], self.__p.s[t][f]) - dfs[t]['eatr_' + str(f)] = eq_eatr( - dfs[t]['rho_' + str(f)], dfs[t]['metr_' + str(f)], - self.__p.profit_rate, self.__p.u[t]) + dfs[t]["ucc_" + str(f)] = eq_ucc( + dfs[t]["rho_" + str(f)], dfs[t]["delta"] + ) + dfs[t]["metr_" + str(f)] = eq_metr( + dfs[t]["rho_" + str(f)], + self.__p.r_prime[t][f], + self.__p.inflation_rate, + ) + dfs[t]["mettr_" + str(f)] = eq_mettr( + dfs[t]["rho_" + str(f)], self.__p.s[t][f] + ) + dfs[t]["tax_wedge_" + str(f)] = eq_tax_wedge( + dfs[t]["rho_" + str(f)], self.__p.s[t][f] + ) + dfs[t]["eatr_" + str(f)] = eq_eatr( + dfs[t]["rho_" + str(f)], + dfs[t]["metr_" + str(f)], + self.__p.profit_rate, + self.__p.u[t], + ) df = pd.concat(dfs, ignore_index=True, copy=True) return df def calc_base(self): - ''' + """ Call functions for the current_year. This involves updating depreciation methods, computing the npv of depreciation (z), and computing the cost of capital (rho) and then calling the calc_all() function to do computations that dependon rho and z. - ''' + """ # conducts static analysis of Calculator object for current_year self.__assets.df = update_depr_methods( - self.__assets.df, self.__p, self.__dp) - dfs = {'c': self.__assets.df[ - self.__assets.df['tax_treat'] == 'corporate'].copy(), - 'pt': self.__assets.df[ - self.__assets.df['tax_treat'] == 'non-corporate'].copy()} + self.__assets.df, self.__p, self.__dp + ) + dfs = { + "c": self.__assets.df[ + self.__assets.df["tax_treat"] == "corporate" + ].copy(), + "pt": self.__assets.df[ + self.__assets.df["tax_treat"] == "non-corporate" + ].copy(), + } # separate into corp and non-corp dataframe here for t in self.__p.entity_list: for f in self.__p.financing_list: - dfs[t]['z_' + str(f)] = npv_tax_depr( - dfs[t], self.__p.r[t][f], self.__p.inflation_rate, - self.__p.land_expensing) - dfs[t]['rho_' + str(f)] = eq_coc( - dfs[t]['delta'], dfs[t]['z_' + str(f)], + dfs[t]["z_" + str(f)] = npv_tax_depr( + dfs[t], + self.__p.r[t][f], + self.__p.inflation_rate, + self.__p.land_expensing, + ) + dfs[t]["rho_" + str(f)] = eq_coc( + dfs[t]["delta"], + dfs[t]["z_" + str(f)], self.__p.property_tax, - self.__p.u[t], self.__p.inv_tax_credit, - self.__p.inflation_rate, self.__p.r[t][f]) + self.__p.u[t], + self.__p.inv_tax_credit, + self.__p.inflation_rate, + self.__p.r[t][f], + ) if not self.__p.inventory_expensing: - idx = dfs[t]['asset_name'] == 'Inventories' - dfs[t].loc[idx, 'rho_' + str(f)] = np.squeeze( + idx = dfs[t]["asset_name"] == "Inventories" + dfs[t].loc[idx, "rho_" + str(f)] = np.squeeze( eq_coc_inventory( - self.__p.u[t], self.__p.phi, self.__p.Y_v, - self.__p.inflation_rate, self.__p.r[t][f])) - self.__assets.df = pd.concat(dfs, ignore_index=True, copy=True, - sort=True) + self.__p.u[t], + self.__p.phi, + self.__p.Y_v, + self.__p.inflation_rate, + self.__p.r[t][f], + ) + ) + self.__assets.df = pd.concat( + dfs, ignore_index=True, copy=True, sort=True + ) def calc_all(self): - ''' + """ Calculates all CCC variables for some CCC Assets object. - ''' + """ self.calc_base() self.__assets.df = self.calc_other(self.__assets.df) - def calc_by_asset(self, include_inventories=True, - include_land=True): - ''' + def calc_by_asset(self, include_inventories=True, include_land=True): + """ Calculates all variables by asset, including overall, and by major asset categories. @@ -186,53 +238,74 @@ def calc_by_asset(self, include_inventories=True, df (pandas DataFrame): rows are assets and major asset groupings with columns for all output variables - ''' + """ self.calc_base() - asset_df = pd.DataFrame(self.__assets.df.groupby( - ['major_asset_group', 'minor_asset_group', 'bea_asset_code', - 'asset_name', 'tax_treat']).apply(self.__f)).reset_index() + asset_df = pd.DataFrame( + self.__assets.df.groupby( + [ + "major_asset_group", + "minor_asset_group", + "bea_asset_code", + "asset_name", + "tax_treat", + ] + ).apply(self.__f, include_groups=False) + ).reset_index() asset_df = self.calc_other(asset_df) # Find values across minor asset groups - minor_asset_df = pd.DataFrame(self.__assets.df.groupby( - ['minor_asset_group', 'major_asset_group', - 'tax_treat']).apply(self.__f)).reset_index() - minor_asset_df['asset_name'] =\ - minor_asset_df['minor_asset_group'] + minor_asset_df = pd.DataFrame( + self.__assets.df.groupby( + ["minor_asset_group", "major_asset_group", "tax_treat"] + ).apply(self.__f, include_groups=False) + ).reset_index() + minor_asset_df["asset_name"] = minor_asset_df["minor_asset_group"] minor_asset_df = self.calc_other(minor_asset_df) # Find values across major asset_groups - major_asset_df = pd.DataFrame(self.__assets.df.groupby( - ['major_asset_group', 'tax_treat']).apply(self.__f)).reset_index() - major_asset_df['minor_asset_group'] =\ - major_asset_df['major_asset_group'] - major_asset_df['asset_name'] = major_asset_df['major_asset_group'] + major_asset_df = pd.DataFrame( + self.__assets.df.groupby(["major_asset_group", "tax_treat"]).apply( + self.__f, include_groups=False + ) + ).reset_index() + major_asset_df["minor_asset_group"] = major_asset_df[ + "major_asset_group" + ] + major_asset_df["asset_name"] = major_asset_df["major_asset_group"] major_asset_df = self.calc_other(major_asset_df) # Drop land and inventories if conditions met df1 = self.__assets.df if not include_land: - df1.drop(df1[df1.asset_name == 'Land'].index, inplace=True) + df1.drop(df1[df1.asset_name == "Land"].index, inplace=True) if not include_inventories: - df1.drop(df1[df1.asset_name == 'Inventories'].index, - inplace=True) - overall_df = pd.DataFrame(df1.groupby( - ['tax_treat']).apply(self.__f)).reset_index() - overall_df['major_asset_group'] = 'Overall' - overall_df['minor_asset_group'] = 'Overall' - overall_df['asset_name'] = 'Overall' + df1.drop(df1[df1.asset_name == "Inventories"].index, inplace=True) + overall_df = pd.DataFrame( + df1.groupby(["tax_treat"]).apply(self.__f, include_groups=False) + ).reset_index() + overall_df["major_asset_group"] = "Overall" + overall_df["minor_asset_group"] = "Overall" + overall_df["asset_name"] = "Overall" overall_df = self.calc_other(overall_df) - df = pd.concat([asset_df, minor_asset_df, major_asset_df, - overall_df], ignore_index=True, copy=True, - sort=True).reset_index() + df = pd.concat( + [asset_df, minor_asset_df, major_asset_df, overall_df], + ignore_index=True, + copy=True, + sort=True, + ).reset_index() # Drop duplicate rows in case, e.g., only one asset in major # or minor asset group - df.drop_duplicates(subset=['asset_name', 'minor_asset_group', - 'major_asset_group', 'tax_treat'], - inplace=True) + df.drop_duplicates( + subset=[ + "asset_name", + "minor_asset_group", + "major_asset_group", + "tax_treat", + ], + inplace=True, + ) return df - def calc_by_industry(self, include_inventories=True, - include_land=True): - ''' + def calc_by_industry(self, include_inventories=True, include_land=True): + """ Calculates all variables by industry, including overall, and by major asset categories. @@ -246,42 +319,57 @@ def calc_by_industry(self, include_inventories=True, df (Pandas DataFrame): rows are minor industries and major industry groupings with columns for all output variables - ''' + """ self.calc_base() df1 = self.__assets.df if not include_land: - df1.drop(df1[df1.asset_name == 'Land'].index, inplace=True) + df1.drop(df1[df1.asset_name == "Land"].index, inplace=True) if not include_inventories: - df1.drop(df1[df1.asset_name == 'Inventories'].index, - inplace=True) - ind_df = pd.DataFrame(df1.groupby( - ['major_industry', 'bea_ind_code', 'Industry', - 'tax_treat']).apply(self.__f)).reset_index() + df1.drop(df1[df1.asset_name == "Inventories"].index, inplace=True) + ind_df = pd.DataFrame( + df1.groupby( + ["major_industry", "bea_ind_code", "Industry", "tax_treat"] + ).apply(self.__f, include_groups=False) + ).reset_index() ind_df = self.calc_other(ind_df) - major_ind_df = pd.DataFrame(df1.groupby( - ['major_industry', 'tax_treat']).apply(self.__f)).reset_index() - major_ind_df['Industry'] = major_ind_df['major_industry'] + major_ind_df = pd.DataFrame( + df1.groupby(["major_industry", "tax_treat"]).apply( + self.__f, include_groups=False + ) + ).reset_index() + major_ind_df["Industry"] = major_ind_df["major_industry"] major_ind_df = self.calc_other(major_ind_df) # Can put some if statements here if want to exclude land/inventory/etc - overall_df = pd.DataFrame(df1.groupby( - ['tax_treat']).apply(self.__f)).reset_index() - overall_df['major_industry'] = 'Overall' - overall_df['Industry'] = 'Overall' + overall_df = pd.DataFrame( + df1.groupby(["tax_treat"]).apply(self.__f, include_groups=False) + ).reset_index() + overall_df["major_industry"] = "Overall" + overall_df["Industry"] = "Overall" overall_df = self.calc_other(overall_df) - df = pd.concat([ind_df, major_ind_df, overall_df], - ignore_index=True, copy=True, - sort=True).reset_index() + df = pd.concat( + [ind_df, major_ind_df, overall_df], + ignore_index=True, + copy=True, + sort=True, + ).reset_index() # Drop duplicate rows in case, e.g., only one industry in major # industry group - df.drop_duplicates(subset=['Industry', 'major_industry', - 'tax_treat'], inplace=True) + df.drop_duplicates( + subset=["Industry", "major_industry", "tax_treat"], inplace=True + ) return df - def summary_table(self, calc, output_variable='mettr', - include_land=True, include_inventories=True, - output_type=None, path=None): - ''' + def summary_table( + self, + calc, + output_variable="mettr", + include_land=True, + include_inventories=True, + output_type=None, + path=None, + ): + """ Create table summarizing the output_variable under the baseline and reform policies. @@ -305,7 +393,7 @@ def summary_table(self, calc, output_variable='mettr', Returns: table_df (Pandas DataFrame): table - ''' + """ assert output_variable in OUTPUT_VAR_LIST assert output_type in OUTPUT_DATA_FORMATS self.calc_base() @@ -316,119 +404,137 @@ def summary_table(self, calc, output_variable='mettr', dfs_out = [] for df in dfs: if not include_land: - df.drop(df[df.asset_name == 'Land'].index, inplace=True) + df.drop(df[df.asset_name == "Land"].index, inplace=True) if not include_inventories: - df.drop(df[df.asset_name == 'Inventories'].index, - inplace=True) + df.drop(df[df.asset_name == "Inventories"].index, inplace=True) # Compute overall separately by tax treatment - treat_df = pd.DataFrame(df.groupby( - ['tax_treat']).apply(self.__f)).reset_index() + treat_df = pd.DataFrame( + df.groupby(["tax_treat"]).apply(self.__f, include_groups=False) + ).reset_index() treat_df = self.calc_other(treat_df) # Compute overall values, across corp and non-corp # just making up a column with same value in all rows so can # continute to use groupby - df['include'] = 1 + df["include"] = 1 all_df = pd.DataFrame.from_dict( - df.groupby(['include']).apply(self.__f).to_dict()) + df.groupby(["include"]) + .apply(self.__f, include_groups=False) + .to_dict() + ) # set tax_treat to corporate b/c only corp and non-corp # recognized in calc_other() - all_df['tax_treat'] = 'corporate' + all_df["tax_treat"] = "corporate" all_df = self.calc_other(all_df) - all_df['tax_treat'] = 'all' + all_df["tax_treat"] = "all" # Put df's together - dfs_out.append(pd.concat([treat_df, all_df], - ignore_index=True, copy=True, - sort=True).reset_index()) + dfs_out.append( + pd.concat( + [treat_df, all_df], ignore_index=True, copy=True, sort=True + ).reset_index() + ) base_tab = dfs_out[0] reform_tab = dfs_out[1] - # print('reform table = ', reform_tab) diff_tab = diff_two_tables(reform_tab, base_tab) table_dict = { - '': ['Overall', 'Corporations', ' Equity Financed', - ' Debt Financed', 'Pass-Through Entities', - ' Equity Financed', ' Debt Financed'], - VAR_DICT[output_variable] + ' Under Baseline Policy': [ - base_tab[ - base_tab['tax_treat'] == - 'all'][output_variable + '_mix'].values[0], - base_tab[ - base_tab['tax_treat'] == - 'corporate'][output_variable + '_mix'].values[0], - base_tab[ - base_tab['tax_treat'] == - 'corporate'][output_variable + '_e'].values[0], - base_tab[ - base_tab['tax_treat'] == - 'corporate'][output_variable + '_d'].values[0], - base_tab[ - base_tab['tax_treat'] == - 'non-corporate'][output_variable + '_mix'].values[0], - base_tab[ - base_tab['tax_treat'] == - 'non-corporate'][output_variable + '_e'].values[0], - base_tab[ - base_tab['tax_treat'] == - 'non-corporate'][output_variable + '_d'].values[0]], - VAR_DICT[output_variable] + ' Under Reform Policy': [ - reform_tab[ - reform_tab['tax_treat'] == - 'all'][output_variable + '_mix'].values[0], - reform_tab[ - reform_tab['tax_treat'] == - 'corporate'][output_variable + '_mix'].values[0], - reform_tab[ - reform_tab['tax_treat'] == - 'corporate'][output_variable + '_e'].values[0], - reform_tab[ - reform_tab['tax_treat'] == - 'corporate'][output_variable + '_d'].values[0], - reform_tab[ - reform_tab['tax_treat'] == - 'non-corporate'][output_variable + '_mix'].values[0], - reform_tab[ - reform_tab['tax_treat'] == - 'non-corporate'][output_variable + '_e'].values[0], - reform_tab[ - reform_tab['tax_treat'] == - 'non-corporate'][output_variable + '_d'].values[0]], - 'Change from Baseline (pp)': [ - diff_tab[ - diff_tab['tax_treat'] == - 'all'][output_variable + '_mix'].values[0], - diff_tab[ - diff_tab['tax_treat'] == - 'corporate'][output_variable + '_mix'].values[0], - diff_tab[ - diff_tab['tax_treat'] == - 'corporate'][output_variable + '_e'].values[0], - diff_tab[ - diff_tab['tax_treat'] == - 'corporate'][output_variable + '_d'].values[0], - diff_tab[ - diff_tab['tax_treat'] == - 'non-corporate'][output_variable + '_mix'].values[0], - diff_tab[ - diff_tab['tax_treat'] == - 'non-corporate'][output_variable + '_e'].values[0], - diff_tab[ - diff_tab['tax_treat'] == - 'non-corporate'][output_variable + '_d'].values[0]]} + "": [ + "Overall", + "Corporations", + " Equity Financed", + " Debt Financed", + "Pass-Through Entities", + " Equity Financed", + " Debt Financed", + ], + VAR_DICT[output_variable] + + " Under Baseline Policy": [ + base_tab[base_tab["tax_treat"] == "all"][ + output_variable + "_mix" + ].values[0], + base_tab[base_tab["tax_treat"] == "corporate"][ + output_variable + "_mix" + ].values[0], + base_tab[base_tab["tax_treat"] == "corporate"][ + output_variable + "_e" + ].values[0], + base_tab[base_tab["tax_treat"] == "corporate"][ + output_variable + "_d" + ].values[0], + base_tab[base_tab["tax_treat"] == "non-corporate"][ + output_variable + "_mix" + ].values[0], + base_tab[base_tab["tax_treat"] == "non-corporate"][ + output_variable + "_e" + ].values[0], + base_tab[base_tab["tax_treat"] == "non-corporate"][ + output_variable + "_d" + ].values[0], + ], + VAR_DICT[output_variable] + + " Under Reform Policy": [ + reform_tab[reform_tab["tax_treat"] == "all"][ + output_variable + "_mix" + ].values[0], + reform_tab[reform_tab["tax_treat"] == "corporate"][ + output_variable + "_mix" + ].values[0], + reform_tab[reform_tab["tax_treat"] == "corporate"][ + output_variable + "_e" + ].values[0], + reform_tab[reform_tab["tax_treat"] == "corporate"][ + output_variable + "_d" + ].values[0], + reform_tab[reform_tab["tax_treat"] == "non-corporate"][ + output_variable + "_mix" + ].values[0], + reform_tab[reform_tab["tax_treat"] == "non-corporate"][ + output_variable + "_e" + ].values[0], + reform_tab[reform_tab["tax_treat"] == "non-corporate"][ + output_variable + "_d" + ].values[0], + ], + "Change from Baseline (pp)": [ + diff_tab[diff_tab["tax_treat"] == "all"][ + output_variable + "_mix" + ].values[0], + diff_tab[diff_tab["tax_treat"] == "corporate"][ + output_variable + "_mix" + ].values[0], + diff_tab[diff_tab["tax_treat"] == "corporate"][ + output_variable + "_e" + ].values[0], + diff_tab[diff_tab["tax_treat"] == "corporate"][ + output_variable + "_d" + ].values[0], + diff_tab[diff_tab["tax_treat"] == "non-corporate"][ + output_variable + "_mix" + ].values[0], + diff_tab[diff_tab["tax_treat"] == "non-corporate"][ + output_variable + "_e" + ].values[0], + diff_tab[diff_tab["tax_treat"] == "non-corporate"][ + output_variable + "_d" + ].values[0], + ], + } # Make df with dict so can use pandas functions - table_df = pd.DataFrame.from_dict(table_dict, orient='columns') + table_df = pd.DataFrame.from_dict(table_dict, orient="columns") # Put in percentage points - table_df[VAR_DICT[output_variable] + - ' Under Baseline Policy'] *= 100 - table_df[VAR_DICT[output_variable] + - ' Under Reform Policy'] *= 100 - table_df['Change from Baseline (pp)'] *= 100 + table_df[VAR_DICT[output_variable] + " Under Baseline Policy"] *= 100 + table_df[VAR_DICT[output_variable] + " Under Reform Policy"] *= 100 + table_df["Change from Baseline (pp)"] *= 100 table = save_return_table(table_df, output_type, path) return table - def asset_share_table(self, include_land=True, - include_inventories=True, output_type=None, - path=None): - ''' + def asset_share_table( + self, + include_land=True, + include_inventories=True, + output_type=None, + path=None, + ): + """ Create table summarizing the output_variable under the baseline and reform policies. @@ -447,47 +553,60 @@ def asset_share_table(self, include_land=True, Returns: table_df (Pandas DataFrame): table - ''' + """ assert output_type in OUTPUT_DATA_FORMATS df = self.__assets.df.copy() if not include_land: - df.drop(df[df.asset_name == 'Land'].index, inplace=True) + df.drop(df[df.asset_name == "Land"].index, inplace=True) if not include_inventories: - df.drop(df[df.asset_name == 'Inventories'].index, - inplace=True) - df1 = pd.DataFrame(df.groupby( - ['tax_treat', 'major_industry']) - ['assets'].sum()).reset_index() - df2 = df1.pivot(index='major_industry', columns='tax_treat', - values='assets').reset_index() - df2['c_share'] = (df2['corporate'] / (df2['corporate'] + - df2['non-corporate'])) - df2['nc_share'] = (df2['non-corporate'] / (df2['corporate'] + - df2['non-corporate'])) - df2.drop(labels=['corporate', 'non-corporate'], axis=1, - inplace=True) - df2.rename(columns={'c_share': 'Corporate', - 'nc_share': 'Pass-Through', - 'major_industry': 'Industry'}, inplace=True) + df.drop(df[df.asset_name == "Inventories"].index, inplace=True) + df1 = pd.DataFrame( + df.groupby(["tax_treat", "major_industry"])["assets"].sum() + ).reset_index() + df2 = df1.pivot( + index="major_industry", columns="tax_treat", values="assets" + ).reset_index() + df2["c_share"] = df2["corporate"] / ( + df2["corporate"] + df2["non-corporate"] + ) + df2["nc_share"] = df2["non-corporate"] / ( + df2["corporate"] + df2["non-corporate"] + ) + df2.drop(labels=["corporate", "non-corporate"], axis=1, inplace=True) + df2.rename( + columns={ + "c_share": "Corporate", + "nc_share": "Pass-Through", + "major_industry": "Industry", + }, + inplace=True, + ) # Create dictionary for table to get industry's in specific order - table_dict = {'Industry': [], 'Corporate': [], 'Pass-Through': []} + table_dict = {"Industry": [], "Corporate": [], "Pass-Through": []} for item in MAJOR_IND_ORDERED: - table_dict['Industry'].append(item) - table_dict['Corporate'].append( - df2[df2.Industry == item]['Corporate'].values[0]) - table_dict['Pass-Through'].append( - df2[df2.Industry == item]['Pass-Through'].values[0]) - table_df = pd.DataFrame.from_dict(table_dict, orient='columns') - table = save_return_table(table_df, output_type, path, - precision=2) + table_dict["Industry"].append(item) + table_dict["Corporate"].append( + df2[df2.Industry == item]["Corporate"].values[0] + ) + table_dict["Pass-Through"].append( + df2[df2.Industry == item]["Pass-Through"].values[0] + ) + table_df = pd.DataFrame.from_dict(table_dict, orient="columns") + table = save_return_table(table_df, output_type, path, precision=2) return table - def asset_summary_table(self, calc, output_variable='mettr', - financing='mix', include_land=True, - include_inventories=True, output_type=None, - path=None): - ''' + def asset_summary_table( + self, + calc, + output_variable="mettr", + financing="mix", + include_land=True, + include_inventories=True, + output_type=None, + path=None, + ): + """ Create table summarizing the output_variable under the baseline and reform policies by major asset grouping. @@ -514,7 +633,7 @@ def asset_summary_table(self, calc, output_variable='mettr', Returns: table_df (Pandas DataFrame): table - ''' + """ assert financing in self.__p.financing_list assert output_variable in OUTPUT_VAR_LIST assert output_type in OUTPUT_DATA_FORMATS @@ -526,136 +645,171 @@ def asset_summary_table(self, calc, output_variable='mettr', dfs_out = [] for df in dfs: if not include_land: - df.drop(df[df.asset_name == 'Land'].index, inplace=True) + df.drop(df[df.asset_name == "Land"].index, inplace=True) if not include_inventories: - df.drop(df[df.asset_name == 'Inventories'].index, - inplace=True) - # Make dataframe with just results for major asset cateogries - major_asset_df = pd.DataFrame(df.groupby( - ['major_asset_group', - 'tax_treat']).apply(self.__f)).reset_index() - major_asset_df['asset_name'] =\ - major_asset_df['major_asset_group'] + df.drop(df[df.asset_name == "Inventories"].index, inplace=True) + # Make dataframe with just results for major asset categories + major_asset_df = pd.DataFrame( + df.groupby(["major_asset_group", "tax_treat"]).apply( + self.__f, include_groups=False + ) + ).reset_index() + major_asset_df["asset_name"] = major_asset_df["major_asset_group"] major_asset_df = self.calc_other(major_asset_df) # Compute overall separately by tax treatment - treat_df = pd.DataFrame(df.groupby( - ['tax_treat']).apply(self.__f)).reset_index() + treat_df = pd.DataFrame( + df.groupby(["tax_treat"]).apply(self.__f, include_groups=False) + ).reset_index() treat_df = self.calc_other(treat_df) - treat_df['major_asset_group'] = 'Overall' + treat_df["major_asset_group"] = "Overall" # Compute overall values, across corp and non-corp # just making up a column with same value in all rows so can # continute to use groupby - df['include'] = 1 + df["include"] = 1 all_df = pd.DataFrame.from_dict( - df.groupby(['include']).apply(self.__f).to_dict()) + df.groupby(["include"]) + .apply(self.__f, include_groups=False) + .to_dict() + ) # set tax_treat to corporate b/c only corp and non-corp # recognized in calc_other() - all_df['tax_treat'] = 'corporate' + all_df["tax_treat"] = "corporate" all_df = self.calc_other(all_df) - all_df['tax_treat'] = 'all' - all_df['major_asset_group'] = 'Overall' + all_df["tax_treat"] = "all" + all_df["major_asset_group"] = "Overall" # Put df's together - dfs_out.append(pd.concat([major_asset_df, treat_df, all_df], - ignore_index=True, copy=True, - sort=True).reset_index()) + dfs_out.append( + pd.concat( + [major_asset_df, treat_df, all_df], + ignore_index=True, + copy=True, + sort=True, + ).reset_index() + ) base_tab = dfs_out[0] reform_tab = dfs_out[1] diff_tab = diff_two_tables(reform_tab, base_tab) - major_groups = ['Equipment', 'Structures', - 'Intellectual Property'] + major_groups = ["Equipment", "Structures", "Intellectual Property"] if include_inventories: - major_groups.append('Inventories') + major_groups.append("Inventories") if include_land: - major_groups.append('Land') - category_list = ['Overall', 'Corporate'] + major_groups.append("Land") + category_list = ["Overall", "Corporate"] base_out_list = [ - base_tab[base_tab['tax_treat'] == - 'all'][output_variable + '_' + financing].values[0], - base_tab[( - base_tab['tax_treat'] == 'corporate') & - (base_tab['major_asset_group'] == 'Overall')] - [output_variable + '_' + financing].values[0]] + base_tab[base_tab["tax_treat"] == "all"][ + output_variable + "_" + financing + ].values[0], + base_tab[ + (base_tab["tax_treat"] == "corporate") + & (base_tab["major_asset_group"] == "Overall") + ][output_variable + "_" + financing].values[0], + ] reform_out_list = [ - reform_tab[reform_tab['tax_treat'] == 'all'] - [output_variable + '_' + financing].values[0], - reform_tab[( - reform_tab['tax_treat'] == 'corporate') & - (reform_tab['major_asset_group'] == 'Overall')] - [output_variable + '_' + financing].values[0]] + reform_tab[reform_tab["tax_treat"] == "all"][ + output_variable + "_" + financing + ].values[0], + reform_tab[ + (reform_tab["tax_treat"] == "corporate") + & (reform_tab["major_asset_group"] == "Overall") + ][output_variable + "_" + financing].values[0], + ] diff_out_list = [ - diff_tab[diff_tab['tax_treat'] == 'all'] - [output_variable + '_' + financing].values[0], - diff_tab[( - diff_tab['tax_treat'] == 'corporate') & - (diff_tab['major_asset_group'] == 'Overall')] - [output_variable + '_' + financing].values[0]] + diff_tab[diff_tab["tax_treat"] == "all"][ + output_variable + "_" + financing + ].values[0], + diff_tab[ + (diff_tab["tax_treat"] == "corporate") + & (diff_tab["major_asset_group"] == "Overall") + ][output_variable + "_" + financing].values[0], + ] for item in major_groups: - category_list.append(' ' + item) + category_list.append(" " + item) base_out_list.append( - base_tab[(base_tab['tax_treat'] == 'corporate') & - (base_tab['major_asset_group'] == item)] - [output_variable + '_' + financing].values[0]) + base_tab[ + (base_tab["tax_treat"] == "corporate") + & (base_tab["major_asset_group"] == item) + ][output_variable + "_" + financing].values[0] + ) reform_out_list.append( - reform_tab[(reform_tab['tax_treat'] == 'corporate') & - (reform_tab['major_asset_group'] == item)] - [output_variable + '_' + financing].values[0]) + reform_tab[ + (reform_tab["tax_treat"] == "corporate") + & (reform_tab["major_asset_group"] == item) + ][output_variable + "_" + financing].values[0] + ) diff_out_list.append( - diff_tab[(diff_tab['tax_treat'] == 'corporate') & - (diff_tab['major_asset_group'] == item)] - [output_variable + '_' + financing].values[0]) - category_list.append('Pass-through') - base_out_list.append(base_tab[ - (base_tab['tax_treat'] == 'non-corporate') & - (base_tab['major_asset_group'] == 'Overall')] - [output_variable + '_' + financing].values[0]) - reform_out_list.append(reform_tab[ - (reform_tab['tax_treat'] == 'non-corporate') & - (reform_tab['major_asset_group'] == 'Overall')] - [output_variable + '_' + financing].values[0]) - diff_out_list.append(diff_tab[ - (diff_tab['tax_treat'] == 'non-corporate') & - (diff_tab['major_asset_group'] == 'Overall')] - [output_variable + '_' + financing].values[0]) + diff_tab[ + (diff_tab["tax_treat"] == "corporate") + & (diff_tab["major_asset_group"] == item) + ][output_variable + "_" + financing].values[0] + ) + category_list.append("Pass-through") + base_out_list.append( + base_tab[ + (base_tab["tax_treat"] == "non-corporate") + & (base_tab["major_asset_group"] == "Overall") + ][output_variable + "_" + financing].values[0] + ) + reform_out_list.append( + reform_tab[ + (reform_tab["tax_treat"] == "non-corporate") + & (reform_tab["major_asset_group"] == "Overall") + ][output_variable + "_" + financing].values[0] + ) + diff_out_list.append( + diff_tab[ + (diff_tab["tax_treat"] == "non-corporate") + & (diff_tab["major_asset_group"] == "Overall") + ][output_variable + "_" + financing].values[0] + ) for item in major_groups: - category_list.append(' ' + item) + category_list.append(" " + item) base_out_list.append( - base_tab[(base_tab['tax_treat'] == 'non-corporate') & - (base_tab['major_asset_group'] == item)] - [output_variable + '_' + financing].values[0]) + base_tab[ + (base_tab["tax_treat"] == "non-corporate") + & (base_tab["major_asset_group"] == item) + ][output_variable + "_" + financing].values[0] + ) reform_out_list.append( reform_tab[ - (reform_tab['tax_treat'] == 'non-corporate') & - (reform_tab['major_asset_group'] == item)] - [output_variable + '_' + financing].values[0]) + (reform_tab["tax_treat"] == "non-corporate") + & (reform_tab["major_asset_group"] == item) + ][output_variable + "_" + financing].values[0] + ) diff_out_list.append( diff_tab[ - (diff_tab['tax_treat'] == 'non-corporate') & - (diff_tab['major_asset_group'] == item)] - [output_variable + '_' + financing].values[0]) + (diff_tab["tax_treat"] == "non-corporate") + & (diff_tab["major_asset_group"] == item) + ][output_variable + "_" + financing].values[0] + ) table_dict = { - 'Category': category_list, - VAR_DICT[output_variable] + ' Under Baseline Policy': - base_out_list, - VAR_DICT[output_variable] + ' Under Reform Policy': - reform_out_list, - 'Change from Baseline (pp)': diff_out_list} + "Category": category_list, + VAR_DICT[output_variable] + + " Under Baseline Policy": base_out_list, + VAR_DICT[output_variable] + + " Under Reform Policy": reform_out_list, + "Change from Baseline (pp)": diff_out_list, + } # Make df with dict so can use pandas functions - table_df = pd.DataFrame.from_dict(table_dict, orient='columns') + table_df = pd.DataFrame.from_dict(table_dict, orient="columns") # Put in percentage points - table_df[VAR_DICT[output_variable] + - ' Under Baseline Policy'] *= 100 - table_df[VAR_DICT[output_variable] + - ' Under Reform Policy'] *= 100 - table_df['Change from Baseline (pp)'] *= 100 + table_df[VAR_DICT[output_variable] + " Under Baseline Policy"] *= 100 + table_df[VAR_DICT[output_variable] + " Under Reform Policy"] *= 100 + table_df["Change from Baseline (pp)"] *= 100 table = save_return_table(table_df, output_type, path) return table - def industry_summary_table(self, calc, output_variable='mettr', - financing='mix', include_land=True, - include_inventories=True, - output_type=None, path=None): - ''' + def industry_summary_table( + self, + calc, + output_variable="mettr", + financing="mix", + include_land=True, + include_inventories=True, + output_type=None, + path=None, + ): + """ Create table summarizing the output_variable under the baseline and reform policies by major asset grouping. @@ -682,7 +836,7 @@ def industry_summary_table(self, calc, output_variable='mettr', Returns: table_df (Pandas DataFrame): table - ''' + """ assert financing in self.__p.financing_list assert output_variable in OUTPUT_VAR_LIST assert output_type in OUTPUT_DATA_FORMATS @@ -694,126 +848,167 @@ def industry_summary_table(self, calc, output_variable='mettr', dfs_out = [] for df in dfs: if not include_land: - df.drop(df[df.asset_name == 'Land'].index, inplace=True) + df.drop(df[df.asset_name == "Land"].index, inplace=True) if not include_inventories: - df.drop(df[df.asset_name == 'Inventories'].index, - inplace=True) + df.drop(df[df.asset_name == "Inventories"].index, inplace=True) # Make dataframe with just results for major industry - major_ind_df = pd.DataFrame(df.groupby( - ['major_industry', 'tax_treat']).apply( - self.__f)).reset_index() - major_ind_df['Industry'] = major_ind_df['major_industry'] + major_ind_df = pd.DataFrame( + df.groupby(["major_industry", "tax_treat"]).apply( + self.__f, include_groups=False + ) + ).reset_index() + major_ind_df["Industry"] = major_ind_df["major_industry"] major_ind_df = self.calc_other(major_ind_df) # Compute overall separately by tax treatment - treat_df = pd.DataFrame(df.groupby( - ['tax_treat']).apply(self.__f)).reset_index() + treat_df = pd.DataFrame( + df.groupby(["tax_treat"]).apply(self.__f, include_groups=False) + ).reset_index() treat_df = self.calc_other(treat_df) - treat_df['major_industry'] = 'Overall' + treat_df["major_industry"] = "Overall" # Compute overall values, across corp and non-corp # just making up a column with same value in all rows so can # continute to use groupby - df['include'] = 1 + df["include"] = 1 all_df = pd.DataFrame.from_dict( - df.groupby(['include']).apply(self.__f).to_dict()) + df.groupby(["include"]) + .apply(self.__f, include_groups=False) + .to_dict() + ) # set tax_treat to corporate b/c only corp and non-corp # recognized in calc_other() - all_df['tax_treat'] = 'corporate' + all_df["tax_treat"] = "corporate" all_df = self.calc_other(all_df) - all_df['tax_treat'] = 'all' - all_df['major_industry'] = 'Overall' + all_df["tax_treat"] = "all" + all_df["major_industry"] = "Overall" # Put df's together - dfs_out.append(pd.concat([major_ind_df, treat_df, all_df], - ignore_index=True, copy=True, - sort=True).reset_index()) + dfs_out.append( + pd.concat( + [major_ind_df, treat_df, all_df], + ignore_index=True, + copy=True, + sort=True, + ).reset_index() + ) base_tab = dfs_out[0] reform_tab = dfs_out[1] diff_tab = diff_two_tables(reform_tab, base_tab) - category_list = ['Overall', 'Corporate'] + category_list = ["Overall", "Corporate"] base_out_list = [ - base_tab[base_tab['tax_treat'] == - 'all'][output_variable + '_' + financing].values[0], - base_tab[(base_tab['tax_treat'] == 'corporate') & - (base_tab['major_industry'] == 'Overall')] - [output_variable + '_' + financing].values[0]] + base_tab[base_tab["tax_treat"] == "all"][ + output_variable + "_" + financing + ].values[0], + base_tab[ + (base_tab["tax_treat"] == "corporate") + & (base_tab["major_industry"] == "Overall") + ][output_variable + "_" + financing].values[0], + ] reform_out_list = [ - reform_tab[reform_tab['tax_treat'] == 'all'] - [output_variable + '_' + financing].values[0], - reform_tab[(reform_tab['tax_treat'] == 'corporate') & - (reform_tab['major_industry'] == 'Overall')] - [output_variable + '_' + financing].values[0]] + reform_tab[reform_tab["tax_treat"] == "all"][ + output_variable + "_" + financing + ].values[0], + reform_tab[ + (reform_tab["tax_treat"] == "corporate") + & (reform_tab["major_industry"] == "Overall") + ][output_variable + "_" + financing].values[0], + ] diff_out_list = [ - diff_tab[diff_tab['tax_treat'] == 'all'] - [output_variable + '_' + financing].values[0], - diff_tab[(diff_tab['tax_treat'] == 'corporate') & - (diff_tab['major_industry'] == 'Overall')] - [output_variable + '_' + financing].values[0]] + diff_tab[diff_tab["tax_treat"] == "all"][ + output_variable + "_" + financing + ].values[0], + diff_tab[ + (diff_tab["tax_treat"] == "corporate") + & (diff_tab["major_industry"] == "Overall") + ][output_variable + "_" + financing].values[0], + ] for item in MAJOR_IND_ORDERED: - category_list.append(' ' + item) + category_list.append(" " + item) base_out_list.append( - base_tab[(base_tab['tax_treat'] == 'corporate') & - (base_tab['major_industry'] == item)] - [output_variable + '_' + financing].values[0]) + base_tab[ + (base_tab["tax_treat"] == "corporate") + & (base_tab["major_industry"] == item) + ][output_variable + "_" + financing].values[0] + ) reform_out_list.append( - reform_tab[(reform_tab['tax_treat'] == 'corporate') & - (reform_tab['major_industry'] == item)] - [output_variable + '_' + financing].values[0]) + reform_tab[ + (reform_tab["tax_treat"] == "corporate") + & (reform_tab["major_industry"] == item) + ][output_variable + "_" + financing].values[0] + ) diff_out_list.append( - diff_tab[(diff_tab['tax_treat'] == 'corporate') & - (diff_tab['major_industry'] == item)] - [output_variable + '_' + financing].values[0]) - category_list.append('Pass-through') - base_out_list.append(base_tab[ - (base_tab['tax_treat'] == 'non-corporate') & - (base_tab['major_industry'] == 'Overall')] - [output_variable + '_' + financing].values[0]) - reform_out_list.append(reform_tab[ - (reform_tab['tax_treat'] == 'non-corporate') & - (reform_tab['major_industry'] == 'Overall')] - [output_variable + '_' + financing].values[0]) - diff_out_list.append(diff_tab[ - (diff_tab['tax_treat'] == 'non-corporate') & - (diff_tab['major_industry'] == 'Overall')] - [output_variable + '_' + financing].values[0]) + diff_tab[ + (diff_tab["tax_treat"] == "corporate") + & (diff_tab["major_industry"] == item) + ][output_variable + "_" + financing].values[0] + ) + category_list.append("Pass-through") + base_out_list.append( + base_tab[ + (base_tab["tax_treat"] == "non-corporate") + & (base_tab["major_industry"] == "Overall") + ][output_variable + "_" + financing].values[0] + ) + reform_out_list.append( + reform_tab[ + (reform_tab["tax_treat"] == "non-corporate") + & (reform_tab["major_industry"] == "Overall") + ][output_variable + "_" + financing].values[0] + ) + diff_out_list.append( + diff_tab[ + (diff_tab["tax_treat"] == "non-corporate") + & (diff_tab["major_industry"] == "Overall") + ][output_variable + "_" + financing].values[0] + ) for item in MAJOR_IND_ORDERED: - category_list.append(' ' + item) + category_list.append(" " + item) base_out_list.append( - base_tab[(base_tab['tax_treat'] == 'non-corporate') & - (base_tab['major_industry'] == item)] - [output_variable + '_' + financing].values[0]) + base_tab[ + (base_tab["tax_treat"] == "non-corporate") + & (base_tab["major_industry"] == item) + ][output_variable + "_" + financing].values[0] + ) reform_out_list.append( reform_tab[ - (reform_tab['tax_treat'] == 'non-corporate') & - (reform_tab['major_industry'] == item)] - [output_variable + '_' + financing].values[0]) + (reform_tab["tax_treat"] == "non-corporate") + & (reform_tab["major_industry"] == item) + ][output_variable + "_" + financing].values[0] + ) diff_out_list.append( diff_tab[ - (diff_tab['tax_treat'] == 'non-corporate') & - (diff_tab['major_industry'] == item)] - [output_variable + '_' + financing].values[0]) + (diff_tab["tax_treat"] == "non-corporate") + & (diff_tab["major_industry"] == item) + ][output_variable + "_" + financing].values[0] + ) table_dict = { - 'Category': category_list, - VAR_DICT[output_variable] + ' Under Baseline Policy': - base_out_list, - VAR_DICT[output_variable] + ' Under Reform Policy': - reform_out_list, - 'Change from Baseline (pp)': diff_out_list} + "Category": category_list, + VAR_DICT[output_variable] + + " Under Baseline Policy": base_out_list, + VAR_DICT[output_variable] + + " Under Reform Policy": reform_out_list, + "Change from Baseline (pp)": diff_out_list, + } # Make df with dict so can use pandas functions - table_df = pd.DataFrame.from_dict(table_dict, orient='columns') + table_df = pd.DataFrame.from_dict(table_dict, orient="columns") # Put in percentage points - table_df[VAR_DICT[output_variable] + - ' Under Baseline Policy'] *= 100 - table_df[VAR_DICT[output_variable] + - ' Under Reform Policy'] *= 100 - table_df['Change from Baseline (pp)'] *= 100 + table_df[VAR_DICT[output_variable] + " Under Baseline Policy"] *= 100 + table_df[VAR_DICT[output_variable] + " Under Reform Policy"] *= 100 + table_df["Change from Baseline (pp)"] *= 100 table = save_return_table(table_df, output_type, path) return table - def grouped_bar(self, calc, output_variable='mettr', - financing='mix', group_by_asset=True, - corporate=True, include_land=True, - include_inventories=True, include_title=False): - ''' + def grouped_bar( + self, + calc, + output_variable="mettr", + financing="mix", + group_by_asset=True, + corporate=True, + include_land=True, + include_inventories=True, + include_title=False, + ): + """ Create a grouped bar plot (grouped by major industry or major asset group). @@ -841,82 +1036,107 @@ def grouped_bar(self, calc, output_variable='mettr', Returns: p (Bokeh plot object): bar plot - ''' + """ assert financing in self.__p.financing_list assert output_variable in OUTPUT_VAR_LIST if group_by_asset: base_df = self.calc_by_asset( include_land=include_land, - include_inventories=include_inventories) + include_inventories=include_inventories, + ) reform_df = calc.calc_by_asset( - include_land=include_land, - include_inventories=include_inventories + include_land=include_land, + include_inventories=include_inventories, + ) + base_df.drop( + base_df[base_df.asset_name != base_df.major_asset_group].index, + inplace=True, ) - base_df.drop(base_df[base_df.asset_name != - base_df.major_asset_group].index, - inplace=True) reform_df.drop( - reform_df[reform_df.asset_name != - reform_df.major_asset_group].index, - inplace=True) - plot_label = 'major_asset_group' - plot_title = VAR_DICT[output_variable] + ' by Asset Category' + reform_df[ + reform_df.asset_name != reform_df.major_asset_group + ].index, + inplace=True, + ) + plot_label = "major_asset_group" + plot_title = VAR_DICT[output_variable] + " by Asset Category" else: base_df = self.calc_by_industry( include_land=include_land, - include_inventories=include_inventories) + include_inventories=include_inventories, + ) reform_df = calc.calc_by_industry( include_land=include_land, - include_inventories=include_inventories) - base_df.drop(base_df[base_df.Industry != - base_df.major_industry].index, - inplace=True) + include_inventories=include_inventories, + ) + base_df.drop( + base_df[base_df.Industry != base_df.major_industry].index, + inplace=True, + ) reform_df.drop( - reform_df[reform_df.Industry != - reform_df.major_industry].index, - inplace=True) - plot_label = 'major_industry' - plot_title = VAR_DICT[output_variable] + ' by Industry' + reform_df[ + reform_df.Industry != reform_df.major_industry + ].index, + inplace=True, + ) + plot_label = "major_industry" + plot_title = VAR_DICT[output_variable] + " by Industry" # Append dfs together so base policies in one - base_df['policy'] = 'Baseline' - reform_df['policy'] = 'Reform' + base_df["policy"] = "Baseline" + reform_df["policy"] = "Reform" df = pd.concat([base_df, reform_df]) # Drop corporate or non-corporate per arguments if corporate: - df.drop(df[df.tax_treat == 'non-corporate'].index, - inplace=True) - plot_title = plot_title + ' for Corporate Investments' + df.drop(df[df.tax_treat == "non-corporate"].index, inplace=True) + plot_title = plot_title + " for Corporate Investments" else: - df.drop(df[df.tax_treat == 'corporate'].index, - inplace=True) - plot_title = plot_title + ' for Pass-Through Investments' + df.drop(df[df.tax_treat == "corporate"].index, inplace=True) + plot_title = plot_title + " for Pass-Through Investments" # Get mean overall for baseline and reform - mean_base = df[(df[plot_label] == 'Overall') & - (df.policy == 'Baseline')][ - output_variable + '_' + financing].values[0] - mean_reform = df[(df[plot_label] == 'Overall') & - (df.policy == 'Reform')][ - output_variable + '_' + financing].values[0] + mean_base = df[ + (df[plot_label] == "Overall") & (df.policy == "Baseline") + ][output_variable + "_" + financing].values[0] + mean_reform = df[ + (df[plot_label] == "Overall") & (df.policy == "Reform") + ][output_variable + "_" + financing].values[0] # Drop overall means from df - df.drop(df[df[plot_label] == 'Overall'].index, inplace=True) + df.drop(df[df[plot_label] == "Overall"].index, inplace=True) # Drop extra vars and make wide format - df1 = df[[plot_label, output_variable + '_mix', 'policy']] - df2 = df1.pivot(index=plot_label, columns='policy', - values=output_variable + '_' + financing) + df1 = df[[plot_label, output_variable + "_mix", "policy"]] + df2 = df1.pivot( + index=plot_label, + columns="policy", + values=output_variable + "_" + financing, + ) df2.reset_index(inplace=True) # Create grouped barplot source = ColumnDataSource(data=df2) if not include_title: plot_title = None - p = figure(x_range=df2[plot_label], height=350, - title=plot_title, toolbar_location=None, tools="") - p.vbar(x=dodge(plot_label, 0.0, range=p.x_range), - top='Baseline', width=0.2, source=source, color=BLUE, - legend_label='Baseline') - p.vbar(x=dodge(plot_label, 0.25, range=p.x_range), - top='Reform', width=0.2, source=source, color=RED, - legend_label='Reform') + p = figure( + x_range=df2[plot_label], + height=350, + title=plot_title, + toolbar_location=None, + tools="", + ) + p.vbar( + x=dodge(plot_label, 0.0, range=p.x_range), + top="Baseline", + width=0.2, + source=source, + color=BLUE, + legend_label="Baseline", + ) + p.vbar( + x=dodge(plot_label, 0.25, range=p.x_range), + top="Reform", + width=0.2, + source=source, + color=RED, + legend_label="Reform", + ) p.x_range.range_padding = 0.1 p.xgrid.grid_line_color = None p.legend.location = "top_left" @@ -927,20 +1147,36 @@ def grouped_bar(self, calc, output_variable='mettr', p.width = 800 # Add lines for overall mean for baseline and reform - bline = Span(location=mean_base, dimension='width', - line_color=BLUE, - line_alpha=0.2, line_width=2, line_dash='dashed') - rline = Span(location=mean_reform, dimension='width', - line_color=RED, - line_alpha=0.2, line_width=2, line_dash='dashed') + bline = Span( + location=mean_base, + dimension="width", + line_color=BLUE, + line_alpha=0.2, + line_width=2, + line_dash="dashed", + ) + rline = Span( + location=mean_reform, + dimension="width", + line_color=RED, + line_alpha=0.2, + line_width=2, + line_dash="dashed", + ) p.renderers.extend([bline, rline]) return p - def range_plot(self, calc, output_variable='mettr', - corporate=True, include_land=True, - include_inventories=True, include_title=False): - ''' + def range_plot( + self, + calc, + output_variable="mettr", + corporate=True, + include_land=True, + include_inventories=True, + include_title=False, + ): + """ Create a range plot. Args: @@ -961,96 +1197,135 @@ def range_plot(self, calc, output_variable='mettr', Returns: p (Bokeh plot object): bar plot - ''' + """ assert output_variable in OUTPUT_VAR_LIST base_df = self.calc_by_asset( - include_land=include_land, - include_inventories=include_inventories) + include_land=include_land, include_inventories=include_inventories + ) reform_df = calc.calc_by_asset( - include_land=include_land, - include_inventories=include_inventories) - base_df.drop(base_df[ - (base_df.asset_name != base_df.major_asset_group) & - (base_df.asset_name != 'Overall') & - (base_df.asset_name != 'Land') & - (base_df.asset_name != 'Inventories')].index, inplace=True) - reform_df.drop(reform_df[ - (reform_df.asset_name != reform_df.major_asset_group) & - (reform_df.asset_name != 'Overall') & - (reform_df.asset_name != 'Land') & - (reform_df.asset_name != 'Inventories')].index, - inplace=True) + include_land=include_land, include_inventories=include_inventories + ) + base_df.drop( + base_df[ + (base_df.asset_name != base_df.major_asset_group) + & (base_df.asset_name != "Overall") + & (base_df.asset_name != "Land") + & (base_df.asset_name != "Inventories") + ].index, + inplace=True, + ) + reform_df.drop( + reform_df[ + (reform_df.asset_name != reform_df.major_asset_group) + & (reform_df.asset_name != "Overall") + & (reform_df.asset_name != "Land") + & (reform_df.asset_name != "Inventories") + ].index, + inplace=True, + ) # Append dfs together so base policies in one - base_df['policy'] = 'Baseline' - reform_df['policy'] = 'Reform' + base_df["policy"] = "Baseline" + reform_df["policy"] = "Reform" # Drop corporate or non-corporate per arguments if corporate: - base_df.drop(base_df[base_df.tax_treat == - 'non-corporate'].index, inplace=True) - reform_df.drop(reform_df[reform_df.tax_treat == - 'non-corporate'].index, - inplace=True) - plot_subtitle = 'Corporate Investments' + base_df.drop( + base_df[base_df.tax_treat == "non-corporate"].index, + inplace=True, + ) + reform_df.drop( + reform_df[reform_df.tax_treat == "non-corporate"].index, + inplace=True, + ) + plot_subtitle = "Corporate Investments" else: - base_df.drop(base_df[base_df.tax_treat == - 'corporate'].index, inplace=True) - reform_df.drop(reform_df[reform_df.tax_treat == - 'corporate'].index, inplace=True) - plot_subtitle = 'Pass-Through Investments' + base_df.drop( + base_df[base_df.tax_treat == "corporate"].index, inplace=True + ) + reform_df.drop( + reform_df[reform_df.tax_treat == "corporate"].index, + inplace=True, + ) + plot_subtitle = "Pass-Through Investments" dfs = [base_df, reform_df] - policy_list = ['baseline', 'reform'] + policy_list = ["baseline", "reform"] # Create dictionary for source data source_dict = { - 'baseline': {'mins': [], 'maxes': [], 'means': [], - 'min_asset': [], 'max_asset': [], - 'mean_asset': [], - 'types': ["Typically Financed", - "Debt Financed", "Equity Financed"], - 'positions': [-0.1, 0.9, 1.9]}, - 'reform': {'mins': [], 'maxes': [], 'means': [], - 'min_asset': [], 'max_asset': [], - 'mean_asset': [], - 'types': ["Typically Financed", - "Debt Financed", "Equity Financed"], - 'positions': [0.1, 1.1, 2.1]}} + "baseline": { + "mins": [], + "maxes": [], + "means": [], + "min_asset": [], + "max_asset": [], + "mean_asset": [], + "types": [ + "Typically Financed", + "Debt Financed", + "Equity Financed", + ], + "positions": [-0.1, 0.9, 1.9], + }, + "reform": { + "mins": [], + "maxes": [], + "means": [], + "min_asset": [], + "max_asset": [], + "mean_asset": [], + "types": [ + "Typically Financed", + "Debt Financed", + "Equity Financed", + ], + "positions": [0.1, 1.1, 2.1], + }, + } for i, df in enumerate(dfs): - for fin in ('_mix', '_d', '_e'): + for fin in ("_mix", "_d", "_e"): max_index = df[output_variable + fin].idxmax() min_index = df[output_variable + fin].idxmin() maxval = df.loc[max_index][output_variable + fin] minval = df.loc[min_index][output_variable + fin] - minasset = df.loc[min_index]['asset_name'] - maxasset = df.loc[max_index]['asset_name'] - meanval = df[df.asset_name == - 'Overall'][output_variable + fin].values[0] - meanasset = 'Overall' + minasset = df.loc[min_index]["asset_name"] + maxasset = df.loc[max_index]["asset_name"] + meanval = df[df.asset_name == "Overall"][ + output_variable + fin + ].values[0] + meanasset = "Overall" # put values in dictionary - source_dict[policy_list[i]]['mins'].append(minval) - source_dict[policy_list[i]]['maxes'].append(maxval) - source_dict[policy_list[i]]['means'].append(meanval) - source_dict[policy_list[i]]['min_asset'].append(minasset) - source_dict[policy_list[i]]['max_asset'].append(maxasset) - source_dict[policy_list[i]]['mean_asset'].append(meanasset) + source_dict[policy_list[i]]["mins"].append(minval) + source_dict[policy_list[i]]["maxes"].append(maxval) + source_dict[policy_list[i]]["means"].append(meanval) + source_dict[policy_list[i]]["min_asset"].append(minasset) + source_dict[policy_list[i]]["max_asset"].append(maxasset) + source_dict[policy_list[i]]["mean_asset"].append(meanasset) - base_source = ColumnDataSource(data=source_dict['baseline']) - reform_source = ColumnDataSource(data=source_dict['reform']) + base_source = ColumnDataSource(data=source_dict["baseline"]) + reform_source = ColumnDataSource(data=source_dict["reform"]) # Create figure on which to plot - p = figure(width=500, height=500, x_range=(-0.5, 2.5), - toolbar_location=None, tools='') + p = figure( + width=500, + height=500, + x_range=(-0.5, 2.5), + toolbar_location=None, + tools="", + ) # Format graph title and features # Add title if include_title: - p.add_layout(Title(text=plot_subtitle, - text_font_style="italic"), 'above') - p.add_layout(Title(text=VAR_DICT[output_variable], - text_font_size="16pt"), 'above') + p.add_layout( + Title(text=plot_subtitle, text_font_style="italic"), "above" + ) + p.add_layout( + Title(text=VAR_DICT[output_variable], text_font_size="16pt"), + "above", + ) # p.title.text = plot_title # p.title.align = 'center' # p.title.text_font_size = '16pt' - p.title.text_font = 'Helvetica' + p.title.text_font = "Helvetica" p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None @@ -1059,53 +1334,113 @@ def range_plot(self, calc, output_variable='mettr', p.xaxis[0].ticker = FixedTicker(ticks=[0, 1, 2]) # Done as a custom function instead of a categorical axis because # categorical axes do not work well with other features - p.xaxis.formatter = CustomJSTickFormatter(code=''' + p.xaxis.formatter = CustomJSTickFormatter( + code=""" var types = ["Typically Financed", "Debt Financed", "Equity Financed"] return types[tick] - ''') + """ + ) p.yaxis.axis_label = VAR_DICT[output_variable] p.yaxis[0].formatter = NumeralTickFormatter(format="0%") # Line separating positive and negative values - zline = Span(location=0, dimension='width', line_alpha=0.2, - line_width=2, line_dash='dashed') + zline = Span( + location=0, + dimension="width", + line_alpha=0.2, + line_width=2, + line_dash="dashed", + ) p.renderers.extend([zline]) # Color different regions - standard_region = BoxAnnotation(right=0.5, fill_alpha=0.2, - fill_color='white') - debt_region = BoxAnnotation(left=0.5, right=1.5, fill_alpha=0.1, - fill_color='white') - equity_region = BoxAnnotation(left=1.5, fill_alpha=0.2, - fill_color='white') + standard_region = BoxAnnotation( + right=0.5, fill_alpha=0.2, fill_color="white" + ) + debt_region = BoxAnnotation( + left=0.5, right=1.5, fill_alpha=0.1, fill_color="white" + ) + equity_region = BoxAnnotation( + left=1.5, fill_alpha=0.2, fill_color="white" + ) p.add_layout(standard_region) p.add_layout(debt_region) p.add_layout(equity_region) # Draw baseline ranges onto graph - p.segment('positions', 'mins', 'positions', 'maxes', color=BLUE, - line_width=2, source=base_source) + p.segment( + "positions", + "mins", + "positions", + "maxes", + color=BLUE, + line_width=2, + source=base_source, + ) # Add circles for means - p.circle('positions', 'means', size=12, color=BLUE, - source=base_source, legend_label='Baseline') + p.circle( + "positions", + "means", + size=12, + color=BLUE, + source=base_source, + legend_label="Baseline", + ) # Add circles for maxes and mins - p.circle('positions', 'mins', size=12, color=BLUE, - source=base_source, legend_label='Baseline') - p.circle('positions', 'maxes', size=12, color=BLUE, - source=base_source, legend_label='Baseline') + p.circle( + "positions", + "mins", + size=12, + color=BLUE, + source=base_source, + legend_label="Baseline", + ) + p.circle( + "positions", + "maxes", + size=12, + color=BLUE, + source=base_source, + legend_label="Baseline", + ) # Draw reformed ranges onto graph - p.segment('positions', 'mins', 'positions', 'maxes', color=RED, - line_width=2, source=reform_source) + p.segment( + "positions", + "mins", + "positions", + "maxes", + color=RED, + line_width=2, + source=reform_source, + ) # Add circles for means - p.circle('positions', 'means', size=12, color=RED, - source=reform_source, legend_label='Reform') + p.circle( + "positions", + "means", + size=12, + color=RED, + source=reform_source, + legend_label="Reform", + ) # Add circles for maxes and mins - p.circle('positions', 'mins', size=12, color=RED, - source=reform_source, legend_label='Reform') - p.circle('positions', 'maxes', size=12, color=RED, - source=reform_source, legend_label='Reform') + p.circle( + "positions", + "mins", + size=12, + color=RED, + source=reform_source, + legend_label="Reform", + ) + p.circle( + "positions", + "maxes", + size=12, + color=RED, + source=reform_source, + legend_label="Reform", + ) # Set legend location p.legend.location = "bottom_right" @@ -1118,10 +1453,15 @@ def range_plot(self, calc, output_variable='mettr', return p - def bubble_widget(self, calc, output_variable='mettr', - include_land=False, include_inventories=False, - include_IP=False): - ''' + def bubble_widget( + self, + calc, + output_variable="mettr", + include_land=False, + include_inventories=False, + include_IP=False, + ): + """ Create a bubble plot widget. The x-axis shows the value of the output variable, the y are groups (e.g., asset type or industry). The widget allows for one to click buttons to view the values for @@ -1147,169 +1487,246 @@ def bubble_widget(self, calc, output_variable='mettr', Returns: layout (Bokeh Layout object): widget - ''' + """ assert output_variable in OUTPUT_VAR_LIST base_df = self.calc_by_asset() reform_df = calc.calc_by_asset() change_df = diff_two_tables(reform_df, base_df) list_df = [base_df, change_df, reform_df] - list_string = ['base', 'change', 'reform'] + list_string = ["base", "change", "reform"] data_sources = {} for i, df_i in enumerate(list_df): - for t in ['c', 'pt']: - if t == 'c': - df = df_i.drop(df_i[df_i.tax_treat != - 'corporate'].index) + for t in ["c", "pt"]: + if t == "c": + df = df_i.drop(df_i[df_i.tax_treat != "corporate"].index) else: - df = df_i.drop(df_i[df_i.tax_treat != - 'non-corporate'].index) + df = df_i.drop( + df_i[df_i.tax_treat != "non-corporate"].index + ) # Remove data from Intellectual Property, Land, and # Inventories Categories if not include_land: - df.drop(df[df.asset_name == 'Land'].index, - inplace=True) + df.drop(df[df.asset_name == "Land"].index, inplace=True) if not include_inventories: - df.drop(df[df.asset_name == - 'Inventories'].index, inplace=True) + df.drop( + df[df.asset_name == "Inventories"].index, inplace=True + ) if not include_IP: - df.drop(df[df.major_asset_group == - 'Intellectual Property'].index, - inplace=True) + df.drop( + df[ + df.major_asset_group == "Intellectual Property" + ].index, + inplace=True, + ) # define the size DataFrame, if change, use base sizes - if list_string[i] != 'change': + if list_string[i] != "change": SIZES = list(range(20, 80, 15)) - size = pd.qcut(df['assets'].values, len(SIZES), - labels=SIZES) - df['size'] = size + size = pd.qcut( + df["assets"].values, len(SIZES), labels=SIZES + ) + df["size"] = size else: - df['size'] = size + df["size"] = size # Form the two categories: Equipment and Structures equipment_df = df.drop( - df[df.major_asset_group.str.contains( - 'Structures')].index).copy() - equipment_df.drop(equipment_df[ - equipment_df.major_asset_group.str.contains( - 'Buildings')].index, inplace=True) + df[df.major_asset_group.str.contains("Structures")].index + ).copy() + equipment_df.drop( + equipment_df[ + equipment_df.major_asset_group.str.contains( + "Buildings" + ) + ].index, + inplace=True, + ) # Drop overall category and overall equipment equipment_df.drop( - equipment_df[equipment_df.asset_name == - 'Overall'].index, inplace=True) + equipment_df[equipment_df.asset_name == "Overall"].index, + inplace=True, + ) equipment_df.drop( - equipment_df[equipment_df.asset_name == - 'Equipment'].index, inplace=True) - structure_df = df.drop(df[ - ~df.major_asset_group.str.contains( - 'Structures|Buildings')].index).copy() + equipment_df[equipment_df.asset_name == "Equipment"].index, + inplace=True, + ) + structure_df = df.drop( + df[ + ~df.major_asset_group.str.contains( + "Structures|Buildings" + ) + ].index + ).copy() # Drop value for all structures - structure_df.drop(structure_df[ - structure_df.asset_name == 'Structures'].index, - inplace=True) + structure_df.drop( + structure_df[ + structure_df.asset_name == "Structures" + ].index, + inplace=True, + ) # Output variables available in plot - format_fields = ['metr_mix', 'metr_d', 'metr_e', - 'mettr_mix', 'mettr_d', 'mettr_e', - 'rho_mix', 'rho_d', 'rho_e', 'z_mix', - 'z_d', 'z_e'] + format_fields = [ + "metr_mix", + "metr_d", + "metr_e", + "mettr_mix", + "mettr_d", + "mettr_e", + "rho_mix", + "rho_d", + "rho_e", + "z_mix", + "z_d", + "z_e", + ] # Make short category make_short = { - 'Instruments and Communications Equipment': - 'Instruments and Communications', - 'Office and Residential Equipment': - 'Office and Residential', - 'Other Equipment': 'Other', - 'Transportation Equipment': 'Transportation', - 'Other Industrial Equipment': 'Other Industrial', - 'Nonresidential Buildings': 'Nonresidential Bldgs', - 'Residential Buildings': 'Residential Bldgs', - 'Mining and Drilling Structures': 'Mining and Drilling', - 'Other Structures': 'Other', - 'Computers and Software': 'Computers and Software', - 'Industrial Machinery': 'Industrial Machinery'} - - equipment_df['short_category'] =\ - equipment_df['minor_asset_group'] - equipment_df['short_category'].replace(make_short, - inplace=True) - structure_df['short_category'] =\ - structure_df['minor_asset_group'] - structure_df['short_category'].replace(make_short, - inplace=True) + "Instruments and Communications Equipment": "Instruments and Communications", + "Office and Residential Equipment": "Office and Residential", + "Other Equipment": "Other", + "Transportation Equipment": "Transportation", + "Other Industrial Equipment": "Other Industrial", + "Nonresidential Buildings": "Nonresidential Bldgs", + "Residential Buildings": "Residential Bldgs", + "Mining and Drilling Structures": "Mining and Drilling", + "Other Structures": "Other", + "Computers and Software": "Computers and Software", + "Industrial Machinery": "Industrial Machinery", + } + + equipment_df["short_category"] = equipment_df[ + "minor_asset_group" + ] + equipment_df["short_category"] = equipment_df[ + "short_category" + ].replace( + make_short, + ) + structure_df["short_category"] = structure_df[ + "minor_asset_group" + ] + structure_df["short_category"] = structure_df[ + "short_category" + ].replace( + make_short, + ) # Add the Reform and the Baseline to Equipment Asset for f in format_fields: equipment_copy = equipment_df.copy() - equipment_copy['rate'] = equipment_copy[f] - equipment_copy['hover'] = equipment_copy.apply( - lambda x: "{0:.1f}%".format(x[f] * 100), axis=1) + equipment_copy["rate"] = equipment_copy[f] + equipment_copy["hover"] = equipment_copy.apply( + lambda x: "{0:.1f}%".format(x[f] * 100), axis=1 + ) simple_equipment_copy = equipment_copy.filter( - items=['size', 'rate', 'hover', 'short_category', - 'asset_name']) - data_sources[list_string[i] + '_equipment_' + f + - '_' + t] =\ - ColumnDataSource(simple_equipment_copy) + items=[ + "size", + "rate", + "hover", + "short_category", + "asset_name", + ] + ) + data_sources[ + list_string[i] + "_equipment_" + f + "_" + t + ] = ColumnDataSource(simple_equipment_copy) # Add the Reform and the Baseline to Structures Asset for f in format_fields: structure_copy = structure_df.copy() - structure_copy['rate'] = structure_copy[f] - structure_copy['hover'] = structure_copy.apply( - lambda x: "{0:.1f}%".format(x[f] * 100), axis=1) + structure_copy["rate"] = structure_copy[f] + structure_copy["hover"] = structure_copy.apply( + lambda x: "{0:.1f}%".format(x[f] * 100), axis=1 + ) simple_structure_copy = structure_copy.filter( - items=['size', 'rate', 'hover', 'short_category', - 'asset_name']) - data_sources[list_string[i] + '_structure_' + f + - '_' + t] =\ - ColumnDataSource(simple_structure_copy) + items=[ + "size", + "rate", + "hover", + "short_category", + "asset_name", + ] + ) + data_sources[ + list_string[i] + "_structure_" + f + "_" + t + ] = ColumnDataSource(simple_structure_copy) # Create initial data sources to plot on load - if (list_string[i] == 'base' and t == 'c'): + if list_string[i] == "base" and t == "c": equipment_copy = equipment_df.copy() - equipment_copy['rate'] = equipment_copy['mettr_mix'] - equipment_copy['hover'] = equipment_copy.apply( - lambda x: "{0:.1f}%".format(x['mettr_mix'] * 100), - axis=1) + equipment_copy["rate"] = equipment_copy["mettr_mix"] + equipment_copy["hover"] = equipment_copy.apply( + lambda x: "{0:.1f}%".format(x["mettr_mix"] * 100), + axis=1, + ) simple_equipment_copy = equipment_copy.filter( - items=['size', 'rate', 'hover', 'short_category', - 'asset_name']) - data_sources['equip_source'] =\ - ColumnDataSource(simple_equipment_copy) + items=[ + "size", + "rate", + "hover", + "short_category", + "asset_name", + ] + ) + data_sources["equip_source"] = ColumnDataSource( + simple_equipment_copy + ) structure_copy = structure_df.copy() - structure_copy['rate'] = structure_copy['mettr_mix'] - structure_copy['hover'] = structure_copy.apply( - lambda x: "{0:.1f}%".format(x['mettr_mix'] * 100), - axis=1) + structure_copy["rate"] = structure_copy["mettr_mix"] + structure_copy["hover"] = structure_copy.apply( + lambda x: "{0:.1f}%".format(x["mettr_mix"] * 100), + axis=1, + ) simple_structure_copy = structure_copy.filter( - items=['size', 'rate', 'hover', 'short_category', - 'asset_name']) - data_sources['struc_source'] =\ - ColumnDataSource(simple_structure_copy) + items=[ + "size", + "rate", + "hover", + "short_category", + "asset_name", + ] + ) + data_sources["struc_source"] = ColumnDataSource( + simple_structure_copy + ) # Define categories for Equipments assets equipment_assets = [ - 'Computers and Software', 'Instruments and Communications', - 'Office and Residential', 'Transportation', - 'Industrial Machinery', 'Other Industrial', 'Other'] + "Computers and Software", + "Instruments and Communications", + "Office and Residential", + "Transportation", + "Industrial Machinery", + "Other Industrial", + "Other", + ] # Define categories for Structures assets structure_assets = [ - 'Residential Bldgs', 'Nonresidential Bldgs', - 'Mining and Drilling', 'Other'] + "Residential Bldgs", + "Nonresidential Bldgs", + "Mining and Drilling", + "Other", + ] # Equipment plot - p = figure(height=540, width=990, - y_range=list(reversed(equipment_assets)), - tools='hover', background_fill_alpha=0, - title='Marginal Effective Total Tax Rates on ' + - 'Corporate Investments in Equipment') - p.title.align = 'center' - p.title.text_color = '#6B6B73' + p = figure( + height=540, + width=990, + y_range=list(reversed(equipment_assets)), + tools="hover", + background_fill_alpha=0, + title="Marginal Effective Total Tax Rates on " + + "Corporate Investments in Equipment", + ) + p.title.align = "center" + p.title.text_color = "#6B6B73" hover = p.select(dict(type=HoverTool)) - hover.tooltips = [('Asset', ' @asset_name (@hover)')] + hover.tooltips = [("Asset", " @asset_name (@hover)")] p.xaxis.axis_label = "Marginal effective total tax rate" p.xaxis[0].formatter = NumeralTickFormatter(format="0.1%") @@ -1327,16 +1744,32 @@ def bubble_widget(self, calc, output_variable='mettr', p.outline_line_alpha = 1 p.outline_line_color = "black" - p.circle(x='rate', y='short_category', color=BLUE, size='size', - line_color="#333333", fill_alpha=.4, - source=data_sources['equip_source'], alpha=.4) + p.circle( + x="rate", + y="short_category", + color=BLUE, + size="size", + line_color="#333333", + fill_alpha=0.4, + source=data_sources["equip_source"], + alpha=0.4, + ) # Define and add a legend legend_cds = ColumnDataSource( - {'size': SIZES, 'label': ['<$20B', '', '', '<$1T'], - 'x': [0, .15, .35, .6]}) - p_legend = figure(height=150, width=380, x_range=(-0.075, 75), - title='Asset Amount', tools='') + { + "size": SIZES, + "label": ["<$20B", "", "", "<$1T"], + "x": [0, 0.15, 0.35, 0.6], + } + ) + p_legend = figure( + height=150, + width=380, + x_range=(-0.075, 75), + title="Asset Amount", + tools="", + ) # p_legend.circle(y=None, x='x', size='size', source=legend_cds, # color=BLUE, fill_alpha=.4, alpha=.4, # line_color="#333333") @@ -1350,16 +1783,20 @@ def bubble_widget(self, calc, output_variable='mettr', # data_sources['equip_plot'] = p # Structures plot - p2 = figure(height=540, width=990, - y_range=list(reversed(structure_assets)), - tools='hover', background_fill_alpha=0, - title='Marginal Effective Total Tax Rates on ' + - 'Corporate Investments in Structures') - p2.title.align = 'center' - p2.title.text_color = '#6B6B73' + p2 = figure( + height=540, + width=990, + y_range=list(reversed(structure_assets)), + tools="hover", + background_fill_alpha=0, + title="Marginal Effective Total Tax Rates on " + + "Corporate Investments in Structures", + ) + p2.title.align = "center" + p2.title.text_color = "#6B6B73" hover = p2.select(dict(type=HoverTool)) - hover.tooltips = [('Asset', ' @asset_name (@hover)')] + hover.tooltips = [("Asset", " @asset_name (@hover)")] p2.xaxis.axis_label = "Marginal effective total tax rate" p2.xaxis[0].formatter = NumeralTickFormatter(format="0.1%") p2.toolbar_location = None @@ -1371,17 +1808,29 @@ def bubble_widget(self, calc, output_variable='mettr', p2.xaxis.major_tick_line_width = 3 p2.xaxis.minor_tick_line_color = "orange" - p2.circle(x='rate', y='short_category', color=RED, size='size', - line_color="#333333", fill_alpha=.4, - source=data_sources['struc_source'], alpha=.4) + p2.circle( + x="rate", + y="short_category", + color=RED, + size="size", + line_color="#333333", + fill_alpha=0.4, + source=data_sources["struc_source"], + alpha=0.4, + ) p2.outline_line_width = 1 p2.outline_line_alpha = 1 p2.outline_line_color = "black" # Define and add a legend - p2_legend = figure(height=150, width=380, x_range=(-0.075, .75), - title='Asset Amount', tools='') + p2_legend = figure( + height=150, + width=380, + x_range=(-0.075, 0.75), + title="Asset Amount", + tools="", + ) # p2_legend.circle(y=None, x='x', size='size', source=legend_cds, # color=RED, fill_alpha=.4, alpha=.4, # line_color="#333333") @@ -1394,34 +1843,43 @@ def bubble_widget(self, calc, output_variable='mettr', # add buttons controls_callback = CustomJS( - args=data_sources, code=CONTROLS_CALLBACK_SCRIPT) + args=data_sources, code=CONTROLS_CALLBACK_SCRIPT + ) c_pt_buttons = RadioButtonGroup( - labels=['Corporate', 'Noncorporate'], active=0) - c_pt_buttons.js_on_change('value', controls_callback) - controls_callback.args['c_pt_buttons'] = c_pt_buttons + labels=["Corporate", "Noncorporate"], active=0 + ) + c_pt_buttons.js_on_change("value", controls_callback) + controls_callback.args["c_pt_buttons"] = c_pt_buttons format_buttons = RadioButtonGroup( - labels=['Baseline', 'Reform', 'Change'], active=0) - format_buttons.js_on_change('value', controls_callback) - controls_callback.args['format_buttons'] = format_buttons + labels=["Baseline", "Reform", "Change"], active=0 + ) + format_buttons.js_on_change("value", controls_callback) + controls_callback.args["format_buttons"] = format_buttons interest_buttons = RadioButtonGroup( - labels=['METTR', 'METR', 'Cost of Capital', - 'NPV of Depreciation'], active=0, width=700) - interest_buttons.js_on_change('value', controls_callback) - controls_callback.args['interest_buttons'] = interest_buttons + labels=["METTR", "METR", "Cost of Capital", "NPV of Depreciation"], + active=0, + width=700, + ) + interest_buttons.js_on_change("value", controls_callback) + controls_callback.args["interest_buttons"] = interest_buttons type_buttons = RadioButtonGroup( - labels=['Typically Financed', 'Equity Financed', - 'Debt Financed'], active=0, width=700) - type_buttons.js_on_change('value', controls_callback) - controls_callback.args['type_buttons'] = type_buttons + labels=["Typically Financed", "Equity Financed", "Debt Financed"], + active=0, + width=700, + ) + type_buttons.js_on_change("value", controls_callback) + controls_callback.args["type_buttons"] = type_buttons # Create Tabs - tab = TabPanel(child=column([p, p_legend]), title='Equipment') - tab2 = TabPanel(child=column([p2, p2_legend]), title='Structures') + tab = TabPanel(child=column([p, p_legend]), title="Equipment") + tab2 = TabPanel(child=column([p2, p2_legend]), title="Structures") tabs = Tabs(tabs=[tab, tab2]) layout = gridplot( - children=[[tabs], - [c_pt_buttons, interest_buttons], - [format_buttons, type_buttons]] + children=[ + [tabs], + [c_pt_buttons, interest_buttons], + [format_buttons, type_buttons], + ] ) # layout = gridplot([p, p2], ncols=2, width=250, height=250) # doc = curdoc() @@ -1441,10 +1899,17 @@ def bubble_widget(self, calc, output_variable='mettr', return layout - def asset_bubble(self, calc, output_variable='mettr_mix', - include_inventories=False, include_land=False, - include_IP=False, include_title=False, path=''): - ''' + def asset_bubble( + self, + calc, + output_variable="mettr_mix", + include_inventories=False, + include_land=False, + include_IP=False, + include_title=False, + path="", + ): + """ Create a bubble plot showing the value of the output variable along the x-axis, asset type groupings on the y-axis, and bubbles whose size represent the total assets of a specific type. @@ -1467,132 +1932,175 @@ def asset_bubble(self, calc, output_variable='mettr_mix', Returns: tabs (Bokeh Tabs object): bubble plots - ''' + """ # Load data as DataFrame df = self.calc_by_asset() # Keep only corporate - df.drop(df[df.tax_treat != 'corporate'].index, inplace=True) + df.drop(df[df.tax_treat != "corporate"].index, inplace=True) # Remove data from Intellectual Property, Land, and # Inventories Categories if not include_land: - df.drop(df[df.asset_name == 'Land'].index, - inplace=True) + df.drop(df[df.asset_name == "Land"].index, inplace=True) if not include_inventories: - df.drop(df[df.asset_name == - 'Inventories'].index, inplace=True) + df.drop(df[df.asset_name == "Inventories"].index, inplace=True) if not include_IP: - df.drop(df[df.major_asset_group == - 'Intellectual Property'].index, - inplace=True) + df.drop( + df[df.major_asset_group == "Intellectual Property"].index, + inplace=True, + ) # define the size DataFrame SIZES = list(range(20, 80, 15)) - df['size'] = pd.qcut(df['assets'].values, len(SIZES), labels=SIZES) + df["size"] = pd.qcut(df["assets"].values, len(SIZES), labels=SIZES) # Form the two Categories: Equipment and Structures equipment_df = df.drop( - df[df.minor_asset_group.str.contains( - 'Structures')].index).copy() - equipment_df.drop(equipment_df[ - equipment_df.minor_asset_group.str.contains( - 'Buildings')].index, inplace=True) + df[df.minor_asset_group.str.contains("Structures")].index + ).copy() + equipment_df.drop( + equipment_df[ + equipment_df.minor_asset_group.str.contains("Buildings") + ].index, + inplace=True, + ) # Drop overall category and overall equipment equipment_df.drop( - equipment_df[equipment_df.asset_name == - 'Overall'].index, inplace=True) + equipment_df[equipment_df.asset_name == "Overall"].index, + inplace=True, + ) equipment_df.drop( - equipment_df[equipment_df.asset_name == - 'Equipment'].index, inplace=True) - structure_df = df.drop(df[ - ~df.minor_asset_group.str.contains( - 'Structures|Buildings')].index).copy() + equipment_df[equipment_df.asset_name == "Equipment"].index, + inplace=True, + ) + structure_df = df.drop( + df[ + ~df.minor_asset_group.str.contains("Structures|Buildings") + ].index + ).copy() # Make short category make_short = { - 'Instruments and Communications Equipment': - 'Instruments and Communications', - 'Office and Residential Equipment': - 'Office and Residential', - 'Other Equipment': 'Other', - 'Transportation Equipment': 'Transportation', - 'Other Industrial Equipment': 'Other Industrial', - 'Nonresidential Buildings': 'Nonresidential Bldgs', - 'Residential Buildings': 'Residential Bldgs', - 'Mining and Drilling Structures': 'Mining and Drilling', - 'Other Structures': 'Other', - 'Computers and Software': 'Computers and Software', - 'Industrial Machinery': 'Industrial Machinery'} - - equipment_df['short_category'] =\ - equipment_df['minor_asset_group'] - equipment_df['short_category'].replace(make_short, - inplace=True) - structure_df['short_category'] =\ - structure_df['minor_asset_group'] - structure_df['short_category'].replace(make_short, - inplace=True) + "Instruments and Communications Equipment": "Instruments and Communications", + "Office and Residential Equipment": "Office and Residential", + "Other Equipment": "Other", + "Transportation Equipment": "Transportation", + "Other Industrial Equipment": "Other Industrial", + "Nonresidential Buildings": "Nonresidential Bldgs", + "Residential Buildings": "Residential Bldgs", + "Mining and Drilling Structures": "Mining and Drilling", + "Other Structures": "Other", + "Computers and Software": "Computers and Software", + "Industrial Machinery": "Industrial Machinery", + } + + equipment_df["short_category"] = equipment_df["minor_asset_group"] + # equipment_df['short_category'].replace(make_short, + # inplace=True) + equipment_df.replace( + {"short_category": make_short}, regex=True, inplace=True + ) + structure_df["short_category"] = structure_df["minor_asset_group"] + # structure_df['short_category'].replace(make_short, + # inplace=True) + structure_df.replace( + {"short_category": make_short}, regex=True, inplace=True + ) # Set up datasources data_sources = {} - format_fields = [output_variable] # Add the Reform and the Baseline to Equipment Asset - for f in format_fields: - equipment_copy = equipment_df.copy() - equipment_copy['baseline'] = equipment_copy[f] - equipment_copy['hover'] = equipment_copy.apply( - lambda x: "{0:.1f}%".format(x[f] * 100), axis=1) - data_sources['equipment_' + f] = ColumnDataSource( - equipment_copy[['baseline', 'size', 'hover', 'assets', - 'short_category', 'asset_name']]) + equipment_copy = equipment_df.copy() + equipment_copy["baseline"] = equipment_copy[output_variable] + equipment_copy["hover"] = ( + equipment_copy[output_variable] + .astype(str) + .apply(lambda x: "{:.2}%".format(x)) + ) + data_sources["equipment_" + output_variable] = ColumnDataSource( + equipment_copy[ + [ + "baseline", + "size", + "hover", + "assets", + "short_category", + "asset_name", + ] + ] + ) + # A spacer for the y-axis label - fudge_factor = ' ' + fudge_factor = " " # Add the Reform and the Baseline to Structures Asset - for f in format_fields: - structure_copy = structure_df.copy() - structure_copy['baseline'] = structure_copy[f] - structure_copy['hover'] = structure_copy.apply( - lambda x: "{0:.1f}%".format(x[f] * 100), axis=1) - structure_copy['short_category'] =\ - structure_copy['short_category'].str.replace( - 'Residential Bldgs', fudge_factor + - 'Residential Bldgs') - data_sources['structure_' + f] = ColumnDataSource( - structure_copy[['baseline', 'size', 'hover', 'assets', - 'short_category', 'asset_name']]) + structure_copy = structure_df.copy() + structure_copy["baseline"] = structure_copy[output_variable] + # structure_copy['hover'] = structure_copy.apply( + # lambda x: "{0:.1f}%".format(x[f] * 100), axis=1) + structure_copy["hover"] = structure_copy.astype(str).apply( + lambda x: "{0:.1}%".format(x[output_variable]), axis=1 + ) + structure_copy["short_category"] = structure_copy[ + "short_category" + ].str.replace("Residential Bldgs", fudge_factor + "Residential Bldgs") + data_sources["structure_" + output_variable] = ColumnDataSource( + structure_copy[ + [ + "baseline", + "size", + "hover", + "assets", + "short_category", + "asset_name", + ] + ] + ) # Define categories for Equipments assets - equipment_assets = ['Computers and Software', - 'Instruments and Communications', - 'Office and Residential', - 'Transportation', - 'Industrial Machinery', - 'Other Industrial', - 'Other'] + equipment_assets = [ + "Computers and Software", + "Instruments and Communications", + "Office and Residential", + "Transportation", + "Industrial Machinery", + "Other Industrial", + "Other", + ] # Define categories for Structures assets - structure_assets = ['Residential Bldgs', - 'Nonresidential Bldgs', - 'Mining and Drilling', - 'Other'] + structure_assets = [ + "Residential Bldgs", + "Nonresidential Bldgs", + "Mining and Drilling", + "Other", + ] # Equipment plot - p = figure(height=540, - width=990, - x_range=(-.05, .51), - y_range=list(reversed(equipment_assets)), - # x_axis_location="above", - # toolbar_location=None, - tools='hover', - background_fill_alpha=0, - # change things on all axes - **PLOT_FORMATS) + p = figure( + height=540, + width=990, + x_range=(-0.05, 0.51), + y_range=list(reversed(equipment_assets)), + # x_axis_location="above", + # toolbar_location=None, + tools="hover", + background_fill_alpha=0, + # change things on all axes + **PLOT_FORMATS + ) if include_title: - p.add_layout(Title( - text=('Marginal Effective Tax Rates on Corporate Investments' + - ' in Equipment'), **TITLE_FORMATS), 'above') + p.add_layout( + Title( + text=( + "Marginal Effective Tax Rates on Corporate Investments" + + " in Equipment" + ), + **TITLE_FORMATS + ), + "above", + ) hover = p.select(dict(type=HoverTool)) - hover.tooltips = [('Asset', ' @asset_name (@hover)')] + hover.tooltips = [("Asset", " @asset_name (@hover)")] # source = data_sources['equipment_' + output_variable] @@ -1612,23 +2120,35 @@ def asset_bubble(self, calc, output_variable='mettr_mix', p.outline_line_alpha = 1 p.outline_line_color = "black" - p.circle(x='baseline', - y='short_category', - color=BLUE, - size='size', - line_color="#333333", - line_alpha=.1, - fill_alpha=0.4, - source=ColumnDataSource( - data_sources['equipment_' + output_variable].data), - alpha=.4) + p.circle( + x="baseline", + y="short_category", + color=BLUE, + size="size", + line_color="#333333", + line_alpha=0.1, + fill_alpha=0.4, + source=ColumnDataSource( + data_sources["equipment_" + output_variable].data + ), + alpha=0.4, + ) # Define and add a legend legend_cds = ColumnDataSource( - {'size': SIZES, 'label': ['<$20B', '', '', '<$1T'], - 'x': [0, .15, .35, .6]}) - p_legend = figure(height=150, width=380, x_range=(-0.075, 75), - title='Asset Amount', tools='') + { + "size": SIZES, + "label": ["<$20B", "", "", "<$1T"], + "x": [0, 0.15, 0.35, 0.6], + } + ) + p_legend = figure( + height=150, + width=380, + x_range=(-0.075, 75), + title="Asset Amount", + tools="", + ) # p_legend.circle(y=None, x='x', size='size', source=legend_cds, # color=BLUE, fill_alpha=.4, alpha=.4, # line_color="#333333") @@ -1645,21 +2165,29 @@ def asset_bubble(self, calc, output_variable='mettr_mix', # p.toolbar.logo = None # Structures plot - p2 = figure(height=540, - width=990, - x_range=(-.05, .51), - y_range=list(reversed(structure_assets)), - # toolbar_location=None, - tools='hover', - background_fill_alpha=0, - **PLOT_FORMATS) - p2.add_layout(Title( - text=('Marginal Effective Tax Rates on Corporate ' + - 'Investments in Structures'), **TITLE_FORMATS), - 'above') + p2 = figure( + height=540, + width=990, + x_range=(-0.05, 0.51), + y_range=list(reversed(structure_assets)), + # toolbar_location=None, + tools="hover", + background_fill_alpha=0, + **PLOT_FORMATS + ) + p2.add_layout( + Title( + text=( + "Marginal Effective Tax Rates on Corporate " + + "Investments in Structures" + ), + **TITLE_FORMATS + ), + "above", + ) hover = p2.select(dict(type=HoverTool)) - hover.tooltips = [('Asset', ' @asset_name (@hover)')] + hover.tooltips = [("Asset", " @asset_name (@hover)")] # Format axes p2.xaxis.axis_label = "Marginal Effective Tax Rate" p2.xaxis[0].formatter = NumeralTickFormatter(format="0.1%") @@ -1673,24 +2201,32 @@ def asset_bubble(self, calc, output_variable='mettr_mix', p2.xaxis.major_tick_line_width = 3 p2.xaxis.minor_tick_line_color = "orange" - p2.circle(x='baseline', - y='short_category', - color=RED, - size='size', - line_color="#333333", - # line_alpha=.1, - fill_alpha=0.4, - source=ColumnDataSource( - data_sources['structure_' + output_variable].data), - alpha=.4) + p2.circle( + x="baseline", + y="short_category", + color=RED, + size="size", + line_color="#333333", + # line_alpha=.1, + fill_alpha=0.4, + source=ColumnDataSource( + data_sources["structure_" + output_variable].data + ), + alpha=0.4, + ) p2.outline_line_width = 1 p2.outline_line_alpha = 1 p2.outline_line_color = "black" # Define and add a legend - p2_legend = figure(height=150, width=380, x_range=(-0.075, .75), - title='Asset Amount', tools='') + p2_legend = figure( + height=150, + width=380, + x_range=(-0.075, 0.75), + title="Asset Amount", + tools="", + ) # p2_legend.circle(y=None, x='x', size='size', source=legend_cds, # color=RED, fill_alpha=.4, alpha=.4, # line_color="#333333") @@ -1702,35 +2238,35 @@ def asset_bubble(self, calc, output_variable='mettr_mix', p2_legend.toolbar.active_drag = None # Create Tabs - tab = TabPanel(child=column([p, p_legend]), title='Equipment') - tab2 = TabPanel(child=column([p2, p2_legend]), title='Structures') + tab = TabPanel(child=column([p, p_legend]), title="Equipment") + tab2 = TabPanel(child=column([p2, p2_legend]), title="Structures") tabs = Tabs(tabs=[tab, tab2]) return tabs def store_assets(self): - ''' + """ Make internal copy of embedded Assets object that can then be restored after interim calculations that make temporary changes to the embedded Assets object. - ''' + """ assert self.__stored_assets is None self.__stored_assets = copy.deepcopy(self.__assets) def restore_assets(self): - ''' + """ Set the embedded Assets object to the stored Assets object that was saved in the last call to the store_assets() method. - ''' + """ assert isinstance(self.__stored_assets, Assets) self.__assets = copy.deepcopy(self.__stored_assets) del self.__stored_assets self.__stored_assets = None def p_param(self, param_name, param_value=None): - ''' + """ If param_value is None, return named parameter in embedded Specification object. If param_value is not None, set named parameter in @@ -1743,7 +2279,7 @@ def p_param(self, param_name, param_value=None): Returns: None - ''' + """ if param_value is None: return getattr(self.__p, param_name) setattr(self.__p, param_name, param_value) @@ -1751,22 +2287,22 @@ def p_param(self, param_name, param_value=None): @property def current_year(self): - ''' + """ Calculator class current calendar year property. - ''' + """ return self.__p.year @property def data_year(self): - ''' + """ Calculator class initial (i.e., first) assets data year property. - ''' + """ return self.__assets.data_year def __f(self, x): - ''' + """ Private method. A function to compute sums and weighted averages from a groubpy object. @@ -1776,17 +2312,29 @@ def __f(self, x): Returns: d (Pandas Series): computed variables for the group - ''' + """ d = {} - d['assets'] = x['assets'].sum() - d['delta'] = wavg(x, 'delta', 'assets') - d['rho_mix'] = wavg(x, 'rho_mix', 'assets') - d['rho_d'] = wavg(x, 'rho_d', 'assets') - d['rho_e'] = wavg(x, 'rho_e', 'assets') - d['z_mix'] = wavg(x, 'z_mix', 'assets') - d['z_d'] = wavg(x, 'z_d', 'assets') - d['z_e'] = wavg(x, 'z_e', 'assets') - d['Y'] = wavg(x, 'Y', 'assets') - - return pd.Series(d, index=['assets', 'delta', 'rho_mix', 'rho_d', - 'rho_e', 'z_mix', 'z_d', 'z_e', 'Y']) + d["assets"] = x["assets"].sum() + d["delta"] = wavg(x, "delta", "assets") + d["rho_mix"] = wavg(x, "rho_mix", "assets") + d["rho_d"] = wavg(x, "rho_d", "assets") + d["rho_e"] = wavg(x, "rho_e", "assets") + d["z_mix"] = wavg(x, "z_mix", "assets") + d["z_d"] = wavg(x, "z_d", "assets") + d["z_e"] = wavg(x, "z_e", "assets") + d["Y"] = wavg(x, "Y", "assets") + + return pd.Series( + d, + index=[ + "assets", + "delta", + "rho_mix", + "rho_d", + "rho_e", + "z_mix", + "z_d", + "z_e", + "Y", + ], + ) diff --git a/ccc/constants.py b/ccc/constants.py index f0fedcbd..6aea05f6 100644 --- a/ccc/constants.py +++ b/ccc/constants.py @@ -1,274 +1,569 @@ -VAR_DICT = {'mettr': 'Marginal Effective Total Tax Rate', - 'metr': 'Marginal Effective Tax Rate', - 'rho': 'Cost of Capital', - 'ucc': 'User Cost of Capital', - 'tax_wedge': 'Tax Wedge', - 'z': 'NPV of Depreciation Deductions'} +VAR_DICT = { + "mettr": "Marginal Effective Total Tax Rate", + "metr": "Marginal Effective Tax Rate", + "rho": "Cost of Capital", + "ucc": "User Cost of Capital", + "tax_wedge": "Tax Wedge", + "z": "NPV of Depreciation Deductions", +} -OUTPUT_VAR_LIST = ['metr', 'mettr', 'rho', 'ucc', 'z', 'delta', - 'tax_wedge', 'eatr'] +OUTPUT_VAR_LIST = [ + "metr", + "mettr", + "rho", + "ucc", + "z", + "delta", + "tax_wedge", + "eatr", +] -OUTPUT_DATA_FORMATS = ['csv', 'tex', 'excel', 'json', 'html', None] +OUTPUT_DATA_FORMATS = ["csv", "tex", "excel", "json", "html", None] MAJOR_IND_ORDERED = [ - 'Agriculture, forestry, fishing, and hunting', - 'Mining', 'Utilities', 'Construction', 'Manufacturing', - 'Wholesale trade', 'Retail trade', - 'Transportation and warehousing', 'Information', - 'Finance and insurance', - 'Real estate and rental and leasing', - 'Professional, scientific, and technical services', - 'Management of companies and enterprises', - 'Administrative and waste management services', - 'Educational services', - 'Health care and social assistance', - 'Arts, entertainment, and recreation', - 'Accommodation and food services', - 'Other services, except government'] + "Agriculture, forestry, fishing, and hunting", + "Mining", + "Utilities", + "Construction", + "Manufacturing", + "Wholesale trade", + "Retail trade", + "Transportation and warehousing", + "Information", + "Finance and insurance", + "Real estate and rental and leasing", + "Professional, scientific, and technical services", + "Management of companies and enterprises", + "Administrative and waste management services", + "Educational services", + "Health care and social assistance", + "Arts, entertainment, and recreation", + "Accommodation and food services", + "Other services, except government", +] -TAX_METHODS = {'DB 200%': 2.0, 'DB 150%': 1.5, 'SL': 1.0, - 'Economic': 1.0, 'Expensing': 1.0} +TAX_METHODS = { + "DB 200%": 2.0, + "DB 150%": 1.5, + "SL": 1.0, + "Economic": 1.0, + "Expensing": 1.0, +} MINOR_ASSET_GROUPS = dict.fromkeys( - ['Mainframes', 'PCs', 'DASDs', 'Printers', 'Terminals', - 'Tape drives', 'Storage devices', 'System integrators', - 'Prepackaged software', 'Custom software'], - 'Computers and Software') -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Communications', 'Nonelectro medical instruments', - 'Electro medical instruments', 'Nonmedical instruments', - 'Photocopy and related equipment', - 'Office and accounting equipment'], - 'Instruments and Communications Equipment')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Household furniture', 'Other furniture', 'Household appliances'], - 'Office and Residential Equipment')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Light trucks (including utility vehicles)', - 'Other trucks, buses and truck trailers', 'Autos', 'Aircraft', - 'Ships and boats', 'Railroad equipment', 'Steam engines', - 'Internal combustion engines'], 'Transportation Equipment')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Special industrial machinery', 'General industrial equipment'], - 'Industrial Machinery')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Nuclear fuel', 'Other fabricated metals', - 'Metalworking machinery', 'Electric transmission and distribution', - 'Other agricultural machinery', 'Farm tractors', - 'Other construction machinery', 'Construction tractors', - 'Mining and oilfield machinery'], 'Other Industrial Equipment')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Service industry machinery', 'Other electrical', 'Other'], - 'Other Equipment')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Residential'], 'Residential Buildings')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Manufacturing', 'Office', 'Hospitals', 'Special care', - 'Medical buildings', 'Multimerchandise shopping', - 'Food and beverage establishments', 'Warehouses', - 'Other commercial', 'Air transportation', 'Other transportation', - 'Religious', 'Educational and vocational', 'Lodging', - 'Public safety'], 'Nonresidential Buildings')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Gas', 'Petroleum pipelines', 'Communication', - 'Petroleum and natural gas', 'Mining'], - 'Mining and Drilling Structures')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Electric', 'Wind and solar', 'Amusement and recreation', - 'Other railroad', 'Track replacement', 'Local transit structures', - 'Other land transportation', 'Farm', 'Water supply', - 'Sewage and waste disposal', - 'Highway and conservation and development', 'Mobile structures'], - 'Other Structures')) -MINOR_ASSET_GROUPS.update(dict.fromkeys( - ['Pharmaceutical and medicine manufacturing', - 'Chemical manufacturing, ex. pharma and med', - 'Semiconductor and other component manufacturing', - 'Computers and peripheral equipment manufacturing', - 'Communications equipment manufacturing', - 'Navigational and other instruments manufacturing', - 'Other computer and electronic manufacturing, n.e.c.', - 'Motor vehicles and parts manufacturing', - 'Aerospace products and parts manufacturing', - 'Other manufacturing', - 'Scientific research and development services', - 'Software publishers', 'Financial and real estate services', - 'Computer systems design and related services', - 'All other nonmanufacturing, n.e.c.', - 'Private universities and colleges', - 'Other nonprofit institutions', 'Theatrical movies', - 'Long-lived television programs', 'Books', 'Music', - 'Other entertainment originals', 'Own account software'], - 'Intellectual Property')) + [ + "Mainframes", + "PCs", + "DASDs", + "Printers", + "Terminals", + "Tape drives", + "Storage devices", + "System integrators", + "Prepackaged software", + "Custom software", + ], + "Computers and Software", +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Communications", + "Nonelectro medical instruments", + "Electro medical instruments", + "Nonmedical instruments", + "Photocopy and related equipment", + "Office and accounting equipment", + ], + "Instruments and Communications Equipment", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + ["Household furniture", "Other furniture", "Household appliances"], + "Office and Residential Equipment", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Light trucks (including utility vehicles)", + "Other trucks, buses and truck trailers", + "Autos", + "Aircraft", + "Ships and boats", + "Railroad equipment", + "Steam engines", + "Internal combustion engines", + ], + "Transportation Equipment", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + ["Special industrial machinery", "General industrial equipment"], + "Industrial Machinery", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Nuclear fuel", + "Other fabricated metals", + "Metalworking machinery", + "Electric transmission and distribution", + "Other agricultural machinery", + "Farm tractors", + "Other construction machinery", + "Construction tractors", + "Mining and oilfield machinery", + ], + "Other Industrial Equipment", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + ["Service industry machinery", "Other electrical", "Other"], + "Other Equipment", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys(["Residential"], "Residential Buildings") +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Manufacturing", + "Office", + "Hospitals", + "Special care", + "Medical buildings", + "Multimerchandise shopping", + "Food and beverage establishments", + "Warehouses", + "Other commercial", + "Air transportation", + "Other transportation", + "Religious", + "Educational and vocational", + "Lodging", + "Public safety", + ], + "Nonresidential Buildings", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Gas", + "Petroleum pipelines", + "Communication", + "Petroleum and natural gas", + "Mining", + ], + "Mining and Drilling Structures", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Electric", + "Wind and solar", + "Amusement and recreation", + "Other railroad", + "Track replacement", + "Local transit structures", + "Other land transportation", + "Farm", + "Water supply", + "Sewage and waste disposal", + "Highway and conservation and development", + "Mobile structures", + ], + "Other Structures", + ) +) +MINOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Pharmaceutical and medicine manufacturing", + "Chemical manufacturing, ex. pharma and med", + "Semiconductor and other component manufacturing", + "Computers and peripheral equipment manufacturing", + "Communications equipment manufacturing", + "Navigational and other instruments manufacturing", + "Other computer and electronic manufacturing, n.e.c.", + "Motor vehicles and parts manufacturing", + "Aerospace products and parts manufacturing", + "Other manufacturing", + "Scientific research and development services", + "Software publishers", + "Financial and real estate services", + "Computer systems design and related services", + "All other nonmanufacturing, n.e.c.", + "Private universities and colleges", + "Other nonprofit institutions", + "Theatrical movies", + "Long-lived television programs", + "Books", + "Music", + "Other entertainment originals", + "Own account software", + ], + "Intellectual Property", + ) +) # major asset groups MAJOR_ASSET_GROUPS = dict.fromkeys( - ['Mainframes', 'PCs', 'DASDs', 'Printers', 'Terminals', - 'Tape drives', 'Storage devices', 'System integrators', - 'Prepackaged software', 'Custom software', 'Communications', - 'Nonelectro medical instruments', 'Electro medical instruments', - 'Nonmedical instruments', 'Photocopy and related equipment', - 'Office and accounting equipment', 'Household furniture', - 'Other furniture', 'Household appliances', - 'Light trucks (including utility vehicles)', - 'Other trucks, buses and truck trailers', 'Autos', 'Aircraft', - 'Ships and boats', 'Railroad equipment', 'Steam engines', - 'Internal combustion engines', 'Special industrial machinery', - 'General industrial equipment', 'Nuclear fuel', - 'Other fabricated metals', 'Metalworking machinery', - 'Electric transmission and distribution', - 'Other agricultural machinery', 'Farm tractors', - 'Other construction machinery', 'Construction tractors', - 'Mining and oilfield machinery', 'Service industry machinery', - 'Other electrical', 'Other'], 'Equipment') -MAJOR_ASSET_GROUPS.update(dict.fromkeys( - ['Residential', 'Manufacturing', 'Office', 'Hospitals', - 'Special care', 'Medical buildings', 'Multimerchandise shopping', - 'Food and beverage establishments', 'Warehouses', - 'Other commercial', 'Air transportation', 'Other transportation', - 'Religious', 'Educational and vocational', 'Lodging', - 'Public safety', 'Gas', 'Petroleum pipelines', 'Communication', - 'Petroleum and natural gas', 'Mining', 'Electric', - 'Wind and solar', 'Amusement and recreation', 'Other railroad', - 'Track replacement', 'Local transit structures', - 'Other land transportation', 'Farm', 'Water supply', - 'Sewage and waste disposal', - 'Highway and conservation and development', 'Mobile structures'], - 'Structures')) -MAJOR_ASSET_GROUPS.update(dict.fromkeys( - ['Pharmaceutical and medicine manufacturing', - 'Chemical manufacturing, ex. pharma and med', - 'Semiconductor and other component manufacturing', - 'Computers and peripheral equipment manufacturing', - 'Communications equipment manufacturing', - 'Navigational and other instruments manufacturing', - 'Other computer and electronic manufacturing, n.e.c.', - 'Motor vehicles and parts manufacturing', - 'Aerospace products and parts manufacturing', - 'Other manufacturing', - 'Scientific research and development services', - 'Software publishers', 'Financial and real estate services', - 'Computer systems design and related services', - 'All other nonmanufacturing, n.e.c.', - 'Private universities and colleges', - 'Other nonprofit institutions', 'Theatrical movies', - 'Long-lived television programs', 'Books', 'Music', - 'Other entertainment originals', 'Own account software'], - 'Intellectual Property')) -MAJOR_ASSET_GROUPS.update(dict.fromkeys(['Inventories'], 'Inventories')) -MAJOR_ASSET_GROUPS.update(dict.fromkeys(['Land'], 'Land')) + [ + "Mainframes", + "PCs", + "DASDs", + "Printers", + "Terminals", + "Tape drives", + "Storage devices", + "System integrators", + "Prepackaged software", + "Custom software", + "Communications", + "Nonelectro medical instruments", + "Electro medical instruments", + "Nonmedical instruments", + "Photocopy and related equipment", + "Office and accounting equipment", + "Household furniture", + "Other furniture", + "Household appliances", + "Light trucks (including utility vehicles)", + "Other trucks, buses and truck trailers", + "Autos", + "Aircraft", + "Ships and boats", + "Railroad equipment", + "Steam engines", + "Internal combustion engines", + "Special industrial machinery", + "General industrial equipment", + "Nuclear fuel", + "Other fabricated metals", + "Metalworking machinery", + "Electric transmission and distribution", + "Other agricultural machinery", + "Farm tractors", + "Other construction machinery", + "Construction tractors", + "Mining and oilfield machinery", + "Service industry machinery", + "Other electrical", + "Other", + ], + "Equipment", +) +MAJOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Residential", + "Manufacturing", + "Office", + "Hospitals", + "Special care", + "Medical buildings", + "Multimerchandise shopping", + "Food and beverage establishments", + "Warehouses", + "Other commercial", + "Air transportation", + "Other transportation", + "Religious", + "Educational and vocational", + "Lodging", + "Public safety", + "Gas", + "Petroleum pipelines", + "Communication", + "Petroleum and natural gas", + "Mining", + "Electric", + "Wind and solar", + "Amusement and recreation", + "Other railroad", + "Track replacement", + "Local transit structures", + "Other land transportation", + "Farm", + "Water supply", + "Sewage and waste disposal", + "Highway and conservation and development", + "Mobile structures", + ], + "Structures", + ) +) +MAJOR_ASSET_GROUPS.update( + dict.fromkeys( + [ + "Pharmaceutical and medicine manufacturing", + "Chemical manufacturing, ex. pharma and med", + "Semiconductor and other component manufacturing", + "Computers and peripheral equipment manufacturing", + "Communications equipment manufacturing", + "Navigational and other instruments manufacturing", + "Other computer and electronic manufacturing, n.e.c.", + "Motor vehicles and parts manufacturing", + "Aerospace products and parts manufacturing", + "Other manufacturing", + "Scientific research and development services", + "Software publishers", + "Financial and real estate services", + "Computer systems design and related services", + "All other nonmanufacturing, n.e.c.", + "Private universities and colleges", + "Other nonprofit institutions", + "Theatrical movies", + "Long-lived television programs", + "Books", + "Music", + "Other entertainment originals", + "Own account software", + ], + "Intellectual Property", + ) +) +MAJOR_ASSET_GROUPS.update(dict.fromkeys(["Inventories"], "Inventories")) +MAJOR_ASSET_GROUPS.update(dict.fromkeys(["Land"], "Land")) # define major industry groupings IND_DICT = dict.fromkeys( - ['Farms', 'Forestry, fishing, and related activities'], - 'Agriculture, forestry, fishing, and hunting') -IND_DICT.update(dict.fromkeys( - ['Oil and gas extraction', 'Mining, except oil and gas', - 'Support activities for mining'], 'Mining')) -IND_DICT.update(dict.fromkeys(['Utilities'], 'Utilities')) -IND_DICT.update(dict.fromkeys(['Construction'], 'Construction')) -IND_DICT.update(dict.fromkeys( - ['Wood products', 'Nonmetallic mineral products', 'Primary metals', - 'Fabricated metal products', 'Machinery', - 'Computer and electronic products', - 'Electrical equipment, appliances, and components', - 'Motor vehicles, bodies and trailers, and parts', - 'Other transportation equipment', 'Furniture and related products', - 'Miscellaneous manufacturing', - 'Food, beverage, and tobacco products', - 'Textile mills and textile products', - 'Apparel and leather and allied products', - 'Paper products', 'Printing and related support activities', - 'Petroleum and coal products', 'Chemical products', - 'Plastics and rubber products'], 'Manufacturing')) -IND_DICT.update(dict.fromkeys(['Wholesale trade'], 'Wholesale trade')) -IND_DICT.update(dict.fromkeys(['Retail trade'], 'Retail trade')) -IND_DICT.update(dict.fromkeys( - ['Air transportation', 'Railroad transportation', - 'Water transportation', 'Truck transportation', - 'Transit and ground passenger transportation', - 'Pipeline transportation', - 'Other transportation and support activitis', - 'Warehousing and storage'], 'Transportation and warehousing')) -IND_DICT.update(dict.fromkeys( - ['Publishing industries (including software)', - 'Motion picture and sound recording industries', - 'Broadcasting and telecommunications', - 'Information and telecommunications'], 'Information')) -IND_DICT.update(dict.fromkeys( - ['Federal Reserve banks', - 'Credit intermediation and related activities', - 'Securities, commodity contracts, and investments', - 'Insurance carriers and related activities', - 'Funds, trusts, and other financial vehicles'], - 'Finance and insurance')) -IND_DICT.update(dict.fromkeys( - ['Real estate', - 'Rental and leasing services and lessors of intangible assets'], - 'Real estate and rental and leasing')) -IND_DICT.update(dict.fromkeys( - ['Legal services', 'Computer systems design and related services', - 'Miscellaneous professional, scientific, and technical services'], - 'Professional, scientific, and technical services')) -IND_DICT.update(dict.fromkeys( - ['Management of companies and enterprises'], - 'Management of companies and enterprises')) -IND_DICT.update(dict.fromkeys( - ['Administrative and support services', - 'Waster management and remediation services'], - 'Administrative and waste management services')) -IND_DICT.update(dict.fromkeys(['Educational services'], - 'Educational services')) -IND_DICT.update(dict.fromkeys( - ['Ambulatory health care services', 'Hospitals', - 'Nursing and residential care facilities', 'Social assistance'], - 'Health care and social assistance')) -IND_DICT.update(dict.fromkeys( - ['Performing arts, spectator sports, museums, and related activities', - 'Amusements, gambling, and recreation industries'], - 'Arts, entertainment, and recreation')) -IND_DICT.update(dict.fromkeys( - ['Accomodation', 'Food services and drinking places'], - 'Accommodation and food services')) -IND_DICT.update(dict.fromkeys( - ['Other services, except government'], - 'Other services, except government')) + ["Farms", "Forestry, fishing, and related activities"], + "Agriculture, forestry, fishing, and hunting", +) +IND_DICT.update( + dict.fromkeys( + [ + "Oil and gas extraction", + "Mining, except oil and gas", + "Support activities for mining", + ], + "Mining", + ) +) +IND_DICT.update(dict.fromkeys(["Utilities"], "Utilities")) +IND_DICT.update(dict.fromkeys(["Construction"], "Construction")) +IND_DICT.update( + dict.fromkeys( + [ + "Wood products", + "Nonmetallic mineral products", + "Primary metals", + "Fabricated metal products", + "Machinery", + "Computer and electronic products", + "Electrical equipment, appliances, and components", + "Motor vehicles, bodies and trailers, and parts", + "Other transportation equipment", + "Furniture and related products", + "Miscellaneous manufacturing", + "Food, beverage, and tobacco products", + "Textile mills and textile products", + "Apparel and leather and allied products", + "Paper products", + "Printing and related support activities", + "Petroleum and coal products", + "Chemical products", + "Plastics and rubber products", + ], + "Manufacturing", + ) +) +IND_DICT.update(dict.fromkeys(["Wholesale trade"], "Wholesale trade")) +IND_DICT.update(dict.fromkeys(["Retail trade"], "Retail trade")) +IND_DICT.update( + dict.fromkeys( + [ + "Air transportation", + "Railroad transportation", + "Water transportation", + "Truck transportation", + "Transit and ground passenger transportation", + "Pipeline transportation", + "Other transportation and support activitis", + "Warehousing and storage", + ], + "Transportation and warehousing", + ) +) +IND_DICT.update( + dict.fromkeys( + [ + "Publishing industries (including software)", + "Motion picture and sound recording industries", + "Broadcasting and telecommunications", + "Information and telecommunications", + ], + "Information", + ) +) +IND_DICT.update( + dict.fromkeys( + [ + "Federal Reserve banks", + "Credit intermediation and related activities", + "Securities, commodity contracts, and investments", + "Insurance carriers and related activities", + "Funds, trusts, and other financial vehicles", + ], + "Finance and insurance", + ) +) +IND_DICT.update( + dict.fromkeys( + [ + "Real estate", + "Rental and leasing services and lessors of intangible assets", + ], + "Real estate and rental and leasing", + ) +) +IND_DICT.update( + dict.fromkeys( + [ + "Legal services", + "Computer systems design and related services", + "Miscellaneous professional, scientific, and technical services", + ], + "Professional, scientific, and technical services", + ) +) +IND_DICT.update( + dict.fromkeys( + ["Management of companies and enterprises"], + "Management of companies and enterprises", + ) +) +IND_DICT.update( + dict.fromkeys( + [ + "Administrative and support services", + "Waster management and remediation services", + ], + "Administrative and waste management services", + ) +) +IND_DICT.update( + dict.fromkeys(["Educational services"], "Educational services") +) +IND_DICT.update( + dict.fromkeys( + [ + "Ambulatory health care services", + "Hospitals", + "Nursing and residential care facilities", + "Social assistance", + ], + "Health care and social assistance", + ) +) +IND_DICT.update( + dict.fromkeys( + [ + "Performing arts, spectator sports, museums, and related activities", + "Amusements, gambling, and recreation industries", + ], + "Arts, entertainment, and recreation", + ) +) +IND_DICT.update( + dict.fromkeys( + ["Accomodation", "Food services and drinking places"], + "Accommodation and food services", + ) +) +IND_DICT.update( + dict.fromkeys( + ["Other services, except government"], + "Other services, except government", + ) +) BEA_CODE_DICT = dict.fromkeys( - ['110C', '113F'], 'Agriculture, forestry, fishing, and hunting') -BEA_CODE_DICT.update(dict.fromkeys( - ['2110', '2120', '2130'], 'Mining')) -BEA_CODE_DICT.update(dict.fromkeys(['2200'], 'Utilities')) -BEA_CODE_DICT.update(dict.fromkeys(['2300'], 'Construction')) -BEA_CODE_DICT.update(dict.fromkeys( - ['3210', '3270', '3310', '3320', '3330', '3340', '3350', '336M', - '336O', '3370', '338A', '311A', '313T', '315A', '3220', '3230', - '3240', '3250', '3260'], 'Manufacturing')) -BEA_CODE_DICT.update(dict.fromkeys(['4200'], 'Wholesale trade')) -BEA_CODE_DICT.update(dict.fromkeys(['44RT'], 'Retail trade')) -BEA_CODE_DICT.update(dict.fromkeys( - ['4810', '4820', '4830', '4840', '4850', '4860', '487S', '4930'], - 'Transportation and warehousing')) -BEA_CODE_DICT.update(dict.fromkeys( - ['5110', '5120', '5130', '5140'], 'Information')) -BEA_CODE_DICT.update(dict.fromkeys( - ['5210', '5220', '5230', '5240', '5250'], 'Finance and insurance')) -BEA_CODE_DICT.update(dict.fromkeys( - ['5310', '5320'], 'Real estate and rental and leasing')) -BEA_CODE_DICT.update(dict.fromkeys( - ['5411', '5415', '5412'], - 'Professional, scientific, and technical services')) -BEA_CODE_DICT.update(dict.fromkeys( - ['5500'], 'Management of companies and enterprises')) -BEA_CODE_DICT.update(dict.fromkeys( - ['5610', '5620'], 'Administrative and waste management services')) -BEA_CODE_DICT.update(dict.fromkeys(['6100'], 'Educational services')) -BEA_CODE_DICT.update(dict.fromkeys( - ['6210', '622H', '6230', '6240'], - 'Health care and social assistance')) -BEA_CODE_DICT.update(dict.fromkeys( - ['711A', '7130'], 'Arts, entertainment, and recreation')) -BEA_CODE_DICT.update(dict.fromkeys( - ['7210', '7220'], 'Accommodation and food services')) -BEA_CODE_DICT.update(dict.fromkeys( - ['8100'], 'Other services, except government')) + ["110C", "113F"], "Agriculture, forestry, fishing, and hunting" +) +BEA_CODE_DICT.update(dict.fromkeys(["2110", "2120", "2130"], "Mining")) +BEA_CODE_DICT.update(dict.fromkeys(["2200"], "Utilities")) +BEA_CODE_DICT.update(dict.fromkeys(["2300"], "Construction")) +BEA_CODE_DICT.update( + dict.fromkeys( + [ + "3210", + "3270", + "3310", + "3320", + "3330", + "3340", + "3350", + "336M", + "336O", + "3370", + "338A", + "311A", + "313T", + "315A", + "3220", + "3230", + "3240", + "3250", + "3260", + ], + "Manufacturing", + ) +) +BEA_CODE_DICT.update(dict.fromkeys(["4200"], "Wholesale trade")) +BEA_CODE_DICT.update(dict.fromkeys(["44RT"], "Retail trade")) +BEA_CODE_DICT.update( + dict.fromkeys( + ["4810", "4820", "4830", "4840", "4850", "4860", "487S", "4930"], + "Transportation and warehousing", + ) +) +BEA_CODE_DICT.update( + dict.fromkeys(["5110", "5120", "5130", "5140"], "Information") +) +BEA_CODE_DICT.update( + dict.fromkeys( + ["5210", "5220", "5230", "5240", "5250"], "Finance and insurance" + ) +) +BEA_CODE_DICT.update( + dict.fromkeys(["5310", "5320"], "Real estate and rental and leasing") +) +BEA_CODE_DICT.update( + dict.fromkeys( + ["5411", "5415", "5412"], + "Professional, scientific, and technical services", + ) +) +BEA_CODE_DICT.update( + dict.fromkeys(["5500"], "Management of companies and enterprises") +) +BEA_CODE_DICT.update( + dict.fromkeys( + ["5610", "5620"], "Administrative and waste management services" + ) +) +BEA_CODE_DICT.update(dict.fromkeys(["6100"], "Educational services")) +BEA_CODE_DICT.update( + dict.fromkeys( + ["6210", "622H", "6230", "6240"], "Health care and social assistance" + ) +) +BEA_CODE_DICT.update( + dict.fromkeys(["711A", "7130"], "Arts, entertainment, and recreation") +) +BEA_CODE_DICT.update( + dict.fromkeys(["7210", "7220"], "Accommodation and food services") +) +BEA_CODE_DICT.update( + dict.fromkeys(["8100"], "Other services, except government") +) diff --git a/ccc/data.py b/ccc/data.py index ff425ca5..c11ef6ba 100644 --- a/ccc/data.py +++ b/ccc/data.py @@ -1,6 +1,7 @@ -''' +""" Cost-of-Capital-Calculator asset data class. -''' +""" + # CODING-STYLE CHECKS: # pycodestyle records.py # pylint --disable=locally-disabled records.py @@ -11,8 +12,8 @@ from ccc.utils import ASSET_DATA_CSV_YEAR -class Assets(): - ''' +class Assets: + """ Constructor for the asset-entity type Records class. Args: @@ -40,7 +41,8 @@ class Assets(): which uses all the default parameters of the constructor. - ''' + """ + # suppress pylint warnings about unrecognized Records variables: # pylint: disable=no-member # suppress pylint warnings about uppercase variable names: @@ -51,11 +53,13 @@ class Assets(): ASSET_YEAR = ASSET_DATA_CSV_YEAR CUR_PATH = os.path.abspath(os.path.dirname(__file__)) - VAR_INFO_FILENAME = 'records_variables.json' + VAR_INFO_FILENAME = "records_variables.json" - def __init__(self, - data=os.path.join(CUR_PATH, 'ccc_asset_data.csv'), - start_year=ASSET_DATA_CSV_YEAR): + def __init__( + self, + data=os.path.join(CUR_PATH, "ccc_asset_data.csv"), + start_year=ASSET_DATA_CSV_YEAR, + ): # pylint: disable=too-many-arguments,too-many-locals self.__data_year = start_year # read specified data @@ -66,27 +70,26 @@ def __init__(self, @property def data_year(self): - ''' + """ Records class original data year property. - ''' + """ return self.__data_year @property def array_length(self): - ''' + """ Length of arrays in Records class's DataFrame. - ''' + """ return self.__dim @staticmethod def read_var_info(): - ''' + """ Read Assets variables metadata from JSON file; returns dictionary and specifies static varname sets listed below. - ''' - var_info_path = os.path.join(Assets.CUR_PATH, - Assets.VAR_INFO_FILENAME) + """ + var_info_path = os.path.join(Assets.CUR_PATH, Assets.VAR_INFO_FILENAME) if os.path.exists(var_info_path): with open(var_info_path) as vfile: json_text = vfile.read() @@ -94,13 +97,17 @@ def read_var_info(): else: # cannot call read_egg_ function in unit tests vardict = read_egg_json( - Assets.VAR_INFO_FILENAME) # pragma: no cover - Assets.INTEGER_READ_VARS = set(k for k, v in vardict['read'].items() - if v['type'] == 'int') - FLOAT_READ_VARS = set(k for k, v in vardict['read'].items() - if v['type'] == 'float') - Assets.MUST_READ_VARS = set(k for k, v in vardict['read'].items() - if v.get('required')) + Assets.VAR_INFO_FILENAME + ) # pragma: no cover + Assets.INTEGER_READ_VARS = set( + k for k, v in vardict["read"].items() if v["type"] == "int" + ) + FLOAT_READ_VARS = set( + k for k, v in vardict["read"].items() if v["type"] == "float" + ) + Assets.MUST_READ_VARS = set( + k for k, v in vardict["read"].items() if v.get("required") + ) Assets.USABLE_READ_VARS = Assets.INTEGER_READ_VARS | FLOAT_READ_VARS Assets.INTEGER_VARS = Assets.INTEGER_READ_VARS return vardict @@ -112,7 +119,7 @@ def read_var_info(): INTEGER_VARS = set() def _read_data(self, data): - ''' + """ Read Records data from file or use specified DataFrame as data. Args: @@ -121,7 +128,7 @@ def _read_data(self, data): Returns: None - ''' + """ # pylint: disable=too-many-statements,too-many-branches if Assets.INTEGER_VARS == set(): Assets.read_var_info() @@ -135,7 +142,7 @@ def _read_data(self, data): # cannot call read_egg_ function in unit tests assetdf = read_egg_csv(data) # pragma: no cover else: - msg = 'data is neither a string nor a Pandas DataFrame' + msg = "data is neither a string nor a Pandas DataFrame" raise ValueError(msg) self.__dim = len(assetdf.index) self.__index = assetdf.index diff --git a/ccc/get_taxcalc_rates.py b/ccc/get_taxcalc_rates.py index b3ab88fc..df22448d 100644 --- a/ccc/get_taxcalc_rates.py +++ b/ccc/get_taxcalc_rates.py @@ -4,10 +4,16 @@ from ccc.utils import DEFAULT_START_YEAR, TC_LAST_YEAR, RECORDS_START_YEAR -def get_calculator(baseline, calculator_start_year, reform=None, - data='cps', gfactors=None, weights=None, - records_start_year=RECORDS_START_YEAR): - ''' +def get_calculator( + baseline, + calculator_start_year, + reform=None, + data="cps", + gfactors=None, + weights=None, + records_start_year=RECORDS_START_YEAR, +): + """ This function creates the tax calculator object for the microsim Args: @@ -24,7 +30,7 @@ def get_calculator(baseline, calculator_start_year, reform=None, Returns: calc1 (Tax Calculator Calculator object): TC Calculator object with a current_year equal to calculator_start_year - ''' + """ # create a calculator policy1 = Policy() if data is not None and "cps" in data: @@ -37,8 +43,11 @@ def get_calculator(baseline, calculator_start_year, reform=None, records1.e01100 = np.zeros(records1.e01100.shape[0]) elif data is not None: # pragma: no cover records1 = Records( - data=data, gfactors=gfactors, weights=weights, - start_year=records_start_year) # pragma: no cover + data=data, + gfactors=gfactors, + weights=weights, + start_year=records_start_year, + ) # pragma: no cover else: records1 = Records() # pragma: no cover @@ -51,7 +60,7 @@ def get_calculator(baseline, calculator_start_year, reform=None, # the default set up increments year to 2013 calc1 = Calculator(records=records1, policy=policy1) - print('Calculator initial year = ', calc1.current_year) + print("Calculator initial year = ", calc1.current_year) # this increment_year function extrapolates all PUF variables to # the next year so this step takes the calculator to the start_year @@ -63,9 +72,10 @@ def get_calculator(baseline, calculator_start_year, reform=None, return calc1 -def get_rates(baseline=False, start_year=DEFAULT_START_YEAR, reform={}, - data='cps'): - ''' +def get_rates( + baseline=False, start_year=DEFAULT_START_YEAR, reform={}, data="cps" +): + """ This function computes weighted average marginal tax rates using micro data from the tax calculator @@ -78,10 +88,13 @@ def get_rates(baseline=False, start_year=DEFAULT_START_YEAR, reform={}, individual_rates (dict): individual income (IIT+payroll) marginal tax rates - ''' - calc1 = get_calculator(baseline=baseline, - calculator_start_year=start_year, - reform=reform, data=data) + """ + calc1 = get_calculator( + baseline=baseline, + calculator_start_year=start_year, + reform=reform, + data=data, + ) # running all the functions and calculates taxes calc1.calc_all() @@ -89,68 +102,89 @@ def get_rates(baseline=False, start_year=DEFAULT_START_YEAR, reform={}, # Loop over years in window of calculations end_year = start_year array_size = end_year - start_year + 1 - rates_dict = {'tau_div': 'e00650', 'tau_int': 'e00300', - 'tau_scg': 'p22250', 'tau_lcg': 'p23250'} + rates_dict = { + "tau_div": "e00650", + "tau_int": "e00300", + "tau_scg": "p22250", + "tau_lcg": "p23250", + } individual_rates = { - 'tau_pt': np.zeros(array_size), 'tau_div': np.zeros(array_size), - 'tau_int': np.zeros(array_size), 'tau_scg': np.zeros(array_size), - 'tau_lcg': np.zeros(array_size), 'tau_td': np.zeros(array_size), - 'tau_h': np.zeros(array_size)} + "tau_pt": np.zeros(array_size), + "tau_div": np.zeros(array_size), + "tau_int": np.zeros(array_size), + "tau_scg": np.zeros(array_size), + "tau_lcg": np.zeros(array_size), + "tau_td": np.zeros(array_size), + "tau_h": np.zeros(array_size), + } for year in range(start_year, end_year + 1): - print('Calculator year = ', calc1.current_year) + print("Calculator year = ", calc1.current_year) calc1.advance_to_year(year) - print('year: ', str(calc1.current_year)) + print("year: ", str(calc1.current_year)) # Compute mtrs # Sch C - [mtr_fica_schC, mtr_iit_schC, mtr_combined_schC] =\ - calc1.mtr('e00900p') + [mtr_fica_schC, mtr_iit_schC, mtr_combined_schC] = calc1.mtr("e00900p") # Sch E - includes partnership and s corp income - [mtr_fica_schE, mtr_iit_schE, mtr_combined_schE] =\ - calc1.mtr('e02000') + [mtr_fica_schE, mtr_iit_schE, mtr_combined_schE] = calc1.mtr("e02000") # Partnership and s corp income - [mtr_fica_PT, mtr_iit_PT, mtr_combined_PT] = calc1.mtr('e26270') + [mtr_fica_PT, mtr_iit_PT, mtr_combined_PT] = calc1.mtr("e26270") # pension distributions # does PUF have e01500? Do we want IRA distributions here? # Weird - I see e01500 in PUF, but error when try to call it - [mtr_fica_pension, mtr_iit_pension, mtr_combined_pension] =\ - calc1.mtr('e01700') + [mtr_fica_pension, mtr_iit_pension, mtr_combined_pension] = calc1.mtr( + "e01700" + ) # mortgage interest and property tax deductions # do we also want mtg ins premiums here? # mtg interest - [mtr_fica_mtg, mtr_iit_mtg, mtr_combined_mtg] =\ - calc1.mtr('e19200') + [mtr_fica_mtg, mtr_iit_mtg, mtr_combined_mtg] = calc1.mtr("e19200") # prop tax - [mtr_fica_prop, mtr_iit_prop, mtr_combined_prop] =\ - calc1.mtr('e18500') + [mtr_fica_prop, mtr_iit_prop, mtr_combined_prop] = calc1.mtr("e18500") pos_ti = calc1.array("c04800") > 0 - individual_rates['tau_pt'][year - start_year] = ( - (((mtr_iit_schC * np.abs(calc1.array("e00900p"))) + - (mtr_iit_schE * np.abs(calc1.array("e02000") - - calc1.array("e26270"))) + - (mtr_iit_PT * np.abs(calc1.array("e26270")))) * - pos_ti * calc1.array("s006")).sum() / - ((np.abs(calc1.array("e00900p")) + - np.abs(calc1.array("e02000") - calc1.array("e26270")) + - np.abs(calc1.array("e26270"))) * - pos_ti * calc1.array("s006")).sum()) - individual_rates['tau_td'][year - start_year] = ( - (mtr_iit_pension * calc1.array("e01500") * pos_ti * - calc1.array("s006")).sum() / - (calc1.array("e01500") * pos_ti * - calc1.array("s006")).sum()) - individual_rates['tau_h'][year - start_year] = -1 * ( - ((mtr_iit_mtg * calc1.array("e19200")) + - (mtr_iit_prop * calc1.array("e18500")) * pos_ti * - calc1.array("s006")).sum() / ( - (calc1.array("e19200")) + (calc1.array("e18500")) * - pos_ti * calc1.array("s006")).sum()) + individual_rates["tau_pt"][year - start_year] = ( + ( + (mtr_iit_schC * np.abs(calc1.array("e00900p"))) + + ( + mtr_iit_schE + * np.abs(calc1.array("e02000") - calc1.array("e26270")) + ) + + (mtr_iit_PT * np.abs(calc1.array("e26270"))) + ) + * pos_ti + * calc1.array("s006") + ).sum() / ( + ( + np.abs(calc1.array("e00900p")) + + np.abs(calc1.array("e02000") - calc1.array("e26270")) + + np.abs(calc1.array("e26270")) + ) + * pos_ti + * calc1.array("s006") + ).sum() + individual_rates["tau_td"][year - start_year] = ( + mtr_iit_pension + * calc1.array("e01500") + * pos_ti + * calc1.array("s006") + ).sum() / (calc1.array("e01500") * pos_ti * calc1.array("s006")).sum() + individual_rates["tau_h"][year - start_year] = -1 * ( + ( + (mtr_iit_mtg * calc1.array("e19200")) + + (mtr_iit_prop * calc1.array("e18500")) + * pos_ti + * calc1.array("s006") + ).sum() + / ( + (calc1.array("e19200")) + + (calc1.array("e18500")) * pos_ti * calc1.array("s006") + ).sum() + ) # Loop over MTRs that have only one income source for k, v in rates_dict.items(): [mtr_fica, mtr_iit, mtr_combined] = calc1.mtr(v) individual_rates[k][year - start_year] = ( - (mtr_iit * calc1.array(v) * pos_ti * - calc1.array("s006")).sum() / - (calc1.array(v) * pos_ti * calc1.array("s006")).sum()) + mtr_iit * calc1.array(v) * pos_ti * calc1.array("s006") + ).sum() / (calc1.array(v) * pos_ti * calc1.array("s006")).sum() print(individual_rates) return individual_rates diff --git a/ccc/parameters.py b/ccc/parameters.py index 7822638e..1147f752 100644 --- a/ccc/parameters.py +++ b/ccc/parameters.py @@ -6,21 +6,29 @@ from ccc.get_taxcalc_rates import get_rates from ccc.utils import DEFAULT_START_YEAR import ccc.paramfunctions as pf + CURRENT_PATH = os.path.abspath(os.path.dirname(__file__)) class Specification(paramtools.Parameters): - ''' + """ Specification class, contains model parameters. Inherits ParamTools Parameters abstract base class. - ''' + """ + defaults = os.path.join(CURRENT_PATH, "default_parameters.json") label_to_extend = "year" array_first = True - def __init__(self, test=False, baseline=False, - year=DEFAULT_START_YEAR, call_tc=False, iit_reform={}, - data='cps'): + def __init__( + self, + test=False, + baseline=False, + year=DEFAULT_START_YEAR, + call_tc=False, + iit_reform={}, + data="cps", + ): super().__init__() self.set_state(year=year) self.test = test @@ -32,7 +40,7 @@ def __init__(self, test=False, baseline=False, self.ccc_initialize(call_tc=call_tc) def ccc_initialize(self, call_tc=False): - ''' + """ ParametersBase reads JSON file and sets attributes to self Next call self.compute_default_params for further initialization @@ -43,28 +51,29 @@ def ccc_initialize(self, call_tc=False): Returns: None - ''' + """ if call_tc: # Find individual income tax rates from Tax-Calculator - indiv_rates = get_rates(self.baseline, self.year, - self.iit_reform, self.data) - self.tau_pt = indiv_rates['tau_pt'] - self.tau_div = indiv_rates['tau_div'] - self.tau_int = indiv_rates['tau_int'] - self.tau_scg = indiv_rates['tau_scg'] - self.tau_lcg = indiv_rates['tau_lcg'] - self.tau_td = indiv_rates['tau_td'] - self.tau_h = indiv_rates['tau_h'] + indiv_rates = get_rates( + self.baseline, self.year, self.iit_reform, self.data + ) + self.tau_pt = indiv_rates["tau_pt"] + self.tau_div = indiv_rates["tau_div"] + self.tau_int = indiv_rates["tau_int"] + self.tau_scg = indiv_rates["tau_scg"] + self.tau_lcg = indiv_rates["tau_lcg"] + self.tau_td = indiv_rates["tau_td"] + self.tau_h = indiv_rates["tau_h"] # does cheap calculations to find parameter values self.compute_default_params() def compute_default_params(self): - ''' + """ Does cheap calculations to return parameter values - ''' - self.financing_list = ['mix', 'd', 'e'] - self.entity_list = ['c', 'pt'] + """ + self.financing_list = ["mix", "d", "e"] + self.entity_list = ["c", "pt"] # If new_view, then don't assume don't pay out any dividends # This because under new view, equity investments are financed @@ -76,22 +85,26 @@ def compute_default_params(self): self.s, E_pt = pf.calc_s(self) # Set rate of 1st layer of taxation on investment income - self.u = {'c': self.CIT_rate} + self.u = {"c": self.CIT_rate} if not self.pt_entity_tax_ind.all(): - self.u['pt'] = self.tau_pt + self.u["pt"] = self.tau_pt else: - self.u['pt'] = self.pt_entity_tax_rate - E_dict = {'c': self.E_c, 'pt': E_pt} + self.u["pt"] = self.pt_entity_tax_rate + E_dict = {"c": self.E_c, "pt": E_pt} # Allowance for Corporate Equity - ace_dict = {'c': self.ace_c, 'pt': self.ace_pt} + ace_dict = {"c": self.ace_c, "pt": self.ace_pt} # Limitation on interest deduction - int_haircut_dict = {'c': self.interest_deduct_haircut_c, - 'pt': self.interest_deduct_haircut_pt} + int_haircut_dict = { + "c": self.interest_deduct_haircut_c, + "pt": self.interest_deduct_haircut_pt, + } # Debt financing ratios - f_dict = {'c': {'mix': self.f_c, 'd': 1.0, 'e': 0.0}, - 'pt': {'mix': self.f_pt, 'd': 1.0, 'e': 0.0}} + f_dict = { + "c": {"mix": self.f_c, "d": 1.0, "e": 0.0}, + "pt": {"mix": self.f_pt, "d": 1.0, "e": 0.0}, + } # Compute firm discount factors self.r = {} @@ -106,7 +119,7 @@ def compute_default_params(self): f_dict[t][f], int_haircut_dict[t], E_dict[t], - ace_dict[t] + ace_dict[t], ) # Compute firm after-tax rates of return @@ -118,14 +131,14 @@ def compute_default_params(self): self.nominal_interest_rate, self.inflation_rate, f_dict[t][f], - E_dict[t] - ) + E_dict[t], + ) # if no entity level taxes on pass-throughs, ensure mettr and metr # on non-corp entities the same if not self.pt_entity_tax_ind: for f in self.financing_list: - r_prime['pt'][f] = self.s['pt'][f] + self.inflation_rate + r_prime["pt"][f] = self.s["pt"][f] + self.inflation_rate # if entity level tax, assume distribute earnings at same rate corps # distribute dividends and these are taxed at dividends tax rate # (which seems likely). Also implicitly assumed that if entity @@ -134,30 +147,38 @@ def compute_default_params(self): else: # keep debt and equity financing ratio the same even though now # entity level tax that might now favor debt - self.s['pt']['mix'] = (self.f_pt * self.s['pt']['d'] + - (1 - self.f_pt) * self.s['c']['e']) + self.s["pt"]["mix"] = ( + self.f_pt * self.s["pt"]["d"] + + (1 - self.f_pt) * self.s["c"]["e"] + ) self.r_prime = r_prime # Map string tax methods into multiple of declining balance - self.tax_methods = {'DB 200%': 2.0, 'DB 150%': 1.5, 'SL': 1.0, - 'Economic': 1.0, 'Expensing': 1.0} + self.tax_methods = { + "DB 200%": 2.0, + "DB 150%": 1.5, + "SL": 1.0, + "Economic": 1.0, + "Expensing": 1.0, + } # Create dictionaries with depreciation system and rate of bonus # depreciation by asset class class_list = [3, 5, 7, 10, 15, 20, 25, 27.5, 39] class_list_str = [ - (str(i) if i != 27.5 else '27_5') for i in class_list + (str(i) if i != 27.5 else "27_5") for i in class_list ] self.bonus_deprec = {} for cl in class_list_str: self.bonus_deprec[cl] = getattr( - self, 'BonusDeprec_{}yr'.format(cl)) + self, "BonusDeprec_{}yr".format(cl) + )[0] # to handle land and inventories # this is fixed later, but should work on this - self.bonus_deprec['100'] = 0.0 + self.bonus_deprec["100"] = 0.0 def default_parameters(self): - ''' + """ Return Specification object same as self except it contains default values of all the parameters. @@ -165,12 +186,12 @@ def default_parameters(self): dps (CCC Specification object): Specification instance with the default parameter values - ''' + """ dps = Specification() return dps def update_specification(self, revision, raise_errors=True): - ''' + """ Updates parameter specification with values in revision dictionary. Args: @@ -203,17 +224,15 @@ def update_specification(self, revision, raise_errors=True): 'BonusDeprec_3yr': {2021: 0.60}, } - ''' - if not (isinstance(revision, dict) or - isinstance(revision, str)): - raise ValueError( - 'ERROR: revision is not a dictionary or string') + """ + if not (isinstance(revision, dict) or isinstance(revision, str)): + raise ValueError("ERROR: revision is not a dictionary or string") self.adjust(revision, raise_errors=raise_errors) self.compute_default_params() @staticmethod def _read_json_revision(obj): - ''' + """ Return a revision dictionary, which is suitable for use with the update_specification method, that is derived from the specified JSON object, which can be None or a string containing @@ -221,8 +240,9 @@ def _read_json_revision(obj): a URL beginning with 'http' pointing to a JSON file hosted online, or a valid JSON text. - ''' - return paramtools.Parameters.read_params(obj, 'revision') + """ + return paramtools.Parameters.read_params(obj, "revision") + # end of Specification class @@ -231,28 +251,30 @@ class DepreciationRules(ma.Schema): # set some field validation ranges that can't set in JSON life = ma.fields.Float(validate=ma.validate.Range(min=0, max=100)) method = ma.fields.String( - validate=ma.validate.OneOf(choices=[ - "SL", "Expensing", "DB 150%", "DB 200%", "Economic"]) + validate=ma.validate.OneOf( + choices=["SL", "Expensing", "DB 150%", "DB 200%", "Economic"] + ) ) # Register custom type defined above -paramtools.register_custom_type("depreciation_rules", - ma.fields.Nested(DepreciationRules())) +paramtools.register_custom_type( + "depreciation_rules", ma.fields.Nested(DepreciationRules()) +) class DepreciationParams(paramtools.Parameters): - ''' + """ Depreciation parameters class, contains model depreciation parameters. Inherits ParamTools Parameters abstract base class. - ''' - defaults = os.path.join( - CURRENT_PATH, "tax_depreciation_rules.json") + """ + + defaults = os.path.join(CURRENT_PATH, "tax_depreciation_rules.json") def revision_warnings_errors(spec_revision): - ''' + """ Return warnings and errors for the specified Cost-of-Capital-Calculator Specificaton revision in parameter values. @@ -263,13 +285,13 @@ def revision_warnings_errors(spec_revision): Returns: rtn_dict (dict): dicionary containing any warning or error messages - ''' - rtn_dict = {'warnings': '', 'errors': ''} + """ + rtn_dict = {"warnings": "", "errors": ""} spec = Specification() try: spec.update_specification(spec_revision, raise_errors=False) if spec._errors: - rtn_dict['errors'] = spec._errors + rtn_dict["errors"] = spec._errors except ValueError as valerr_msg: - rtn_dict['errors'] = valerr_msg.__str__() + rtn_dict["errors"] = valerr_msg.__str__() return rtn_dict diff --git a/ccc/paramfunctions.py b/ccc/paramfunctions.py index 44a6c1c9..614e6ab1 100644 --- a/ccc/paramfunctions.py +++ b/ccc/paramfunctions.py @@ -2,7 +2,7 @@ def calc_sprime_c_td(Y_td, tau_td, i, pi): - r''' + r""" Compute after-tax rate of return on savings invested in tax-deferred accounts. @@ -21,16 +21,16 @@ def calc_sprime_c_td(Y_td, tau_td, i, pi): Returns: sprime_c_td (scalar): the after-tax return on corporate investments made through tax-deferred accounts - ''' - sprime_c_td = ( - (1 / Y_td) * np.log(((1 - tau_td) * - np.exp(i * Y_td)) + tau_td) - pi) + """ + sprime_c_td = (1 / Y_td) * np.log( + ((1 - tau_td) * np.exp(i * Y_td)) + tau_td + ) - pi return sprime_c_td def calc_s_c_d_td(sprime_c_td, gamma, i, pi): - r''' + r""" Compute the after-tax return on corprate debt investments made through tax-deferred accounts. @@ -49,15 +49,16 @@ def calc_s_c_d_td(sprime_c_td, gamma, i, pi): s_c_d_td (scalar): the after-tax return on corprate debt investments made through tax-deferred accounts - ''' + """ s_c_d_td = gamma * (i - pi) + (1 - gamma) * sprime_c_td return s_c_d_td -def calc_s__d(s_d_td, alpha_d_ft, alpha_d_td, alpha_d_nt, tau_int, - tau_w, i, pi): - r''' +def calc_s__d( + s_d_td, alpha_d_ft, alpha_d_td, alpha_d_nt, tau_int, tau_w, i, pi +): + r""" Compute the after-tax return to debt investments. .. math:: @@ -80,15 +81,19 @@ def calc_s__d(s_d_td, alpha_d_ft, alpha_d_td, alpha_d_nt, tau_int, Returns: s__d (scalar): after-tax return on debt investments - ''' - s__d = (alpha_d_ft * (((1 - tau_int) * i) - pi) + alpha_d_td * - s_d_td + alpha_d_nt * (i - pi) - tau_w) + """ + s__d = ( + alpha_d_ft * (((1 - tau_int) * i) - pi) + + alpha_d_td * s_d_td + + alpha_d_nt * (i - pi) + - tau_w + ) return s__d def calc_g__g(Y_g, tau_cg, m, E_c, pi): - r''' + r""" Calculate the real, after-tax annualized return on short or long- term capital gains @@ -106,16 +111,16 @@ def calc_g__g(Y_g, tau_cg, m, E_c, pi): Returns: g__g (scalar): real, after-tax annualized return on capital gains - ''' - g__g = ( - (1 / Y_g) * np.log(((1 - tau_cg) * np.exp((pi + m * E_c) * - Y_g)) + tau_cg) - pi) + """ + g__g = (1 / Y_g) * np.log( + ((1 - tau_cg) * np.exp((pi + m * E_c) * Y_g)) + tau_cg + ) - pi return g__g def calc_g(g_scg, g_lcg, g_xcg, omega_scg, omega_lcg, omega_xcg, m, E_c): - r''' + r""" Calculate the after-tax, annualized, real rate of return on all capital gains @@ -142,14 +147,14 @@ def calc_g(g_scg, g_lcg, g_xcg, omega_scg, omega_lcg, omega_xcg, m, E_c): Returns: g (scalar): the after-tax, annualized, real rate of return on all capital gains - ''' + """ g = omega_scg * g_scg + omega_lcg * g_lcg + omega_xcg * g_xcg return g def calc_s_c_e_td(Y_td, tau_td, i, pi, E_c): - r''' + r""" Calculate the after-tax return on investmentes in corporate equity in tax-deferred accounts. @@ -169,17 +174,18 @@ def calc_s_c_e_td(Y_td, tau_td, i, pi, E_c): Returns: s_c_e_td (scalar): the after-tax return on investmentes in corporate equity in tax-deferred accounts. - ''' - s_c_e_td = ( - (1 / Y_td) * np.log(((1 - tau_td) * - np.exp((pi + E_c) * Y_td)) + tau_td) - pi) + """ + s_c_e_td = (1 / Y_td) * np.log( + ((1 - tau_td) * np.exp((pi + E_c) * Y_td)) + tau_td + ) - pi return s_c_e_td -def calc_s_c_e(s_c_e_ft, s_c_e_td, alpha_c_e_ft, alpha_c_e_td, - alpha_c_e_nt, tau_w, E_c): - r''' +def calc_s_c_e( + s_c_e_ft, s_c_e_td, alpha_c_e_ft, alpha_c_e_td, alpha_c_e_nt, tau_w, E_c +): + r""" Calculate the after-tax return on investments in corporate equity .. math:: @@ -203,15 +209,19 @@ def calc_s_c_e(s_c_e_ft, s_c_e_td, alpha_c_e_ft, alpha_c_e_td, Returns: s_c_e (scalar): the after-tax return on investments in corporate equity - ''' - s_c_e = (alpha_c_e_ft * s_c_e_ft + alpha_c_e_td * s_c_e_td + - alpha_c_e_nt * E_c - tau_w) + """ + s_c_e = ( + alpha_c_e_ft * s_c_e_ft + + alpha_c_e_td * s_c_e_td + + alpha_c_e_nt * E_c + - tau_w + ) return s_c_e def calc_s(p): - ''' + """ Compute the after-tax rate of return to savers, s. Calls other `calc_s_x_y` functions to compute various rates of return. @@ -228,27 +238,42 @@ def calc_s(p): * E_pt (scalar): required pre-tax return on pass-through investments - ''' + """ # Compute after-tax rate of return on savings invested in # tax-deferred accounts - sprime_c_td = calc_sprime_c_td(p.Y_td, p.tau_td, - p.nominal_interest_rate, - p.inflation_rate) + sprime_c_td = calc_sprime_c_td( + p.Y_td, p.tau_td, p.nominal_interest_rate, p.inflation_rate + ) # The after-tax return on corprate debt investments made through # tax-deferred accounts - s_c_d_td = calc_s_c_d_td(sprime_c_td, p.gamma, - p.nominal_interest_rate, p.inflation_rate) + s_c_d_td = calc_s_c_d_td( + sprime_c_td, p.gamma, p.nominal_interest_rate, p.inflation_rate + ) # The after-tax return on corporate debt investments - s_c_d = calc_s__d(s_c_d_td, p.alpha_c_d_ft, p.alpha_c_d_td, - p.alpha_c_d_nt, p.tau_int, p.tau_w, - p.nominal_interest_rate, p.inflation_rate) + s_c_d = calc_s__d( + s_c_d_td, + p.alpha_c_d_ft, + p.alpha_c_d_td, + p.alpha_c_d_nt, + p.tau_int, + p.tau_w, + p.nominal_interest_rate, + p.inflation_rate, + ) # The after-tax return on non-corporate debt investments made # through tax deferred accounts s_pt_d_td = s_c_d_td # The after-tax return on non-corporate debt investments - s_pt_d = calc_s__d(s_pt_d_td, p.alpha_pt_d_ft, p.alpha_pt_d_td, - p.alpha_pt_d_nt, p.tau_int, p.tau_w, - p.nominal_interest_rate, p.inflation_rate) + s_pt_d = calc_s__d( + s_pt_d_td, + p.alpha_pt_d_ft, + p.alpha_pt_d_td, + p.alpha_pt_d_nt, + p.tau_int, + p.tau_w, + p.nominal_interest_rate, + p.inflation_rate, + ) # The after-tax real, annualized return on short-term capital gains g_scg = calc_g__g(p.Y_scg, p.tau_scg, p.m, p.E_c, p.inflation_rate) # The after-tax real, annualized return on long-term capital gains @@ -258,18 +283,26 @@ def calc_s(p): g_xcg = calc_g__g(p.Y_xcg, p.tau_xcg, p.m, p.E_c, p.inflation_rate) # The after-tax real, annualized return on all capital gains g = calc_g( - g_scg, g_lcg, g_xcg, p.omega_scg, p.omega_lcg, p.omega_xcg, - p.m, p.E_c) + g_scg, g_lcg, g_xcg, p.omega_scg, p.omega_lcg, p.omega_xcg, p.m, p.E_c + ) # The after-tax return on corporate equity investments made in fully # taxable accounts s_c_e_ft = (1 - p.m) * p.E_c * (1 - p.tau_div) + g # The after-tax return on corporate equity investments made in # tax-deferred acounts - s_c_e_td = calc_s_c_e_td(p.Y_td, p.tau_td, p.nominal_interest_rate, - p.inflation_rate, p.E_c) + s_c_e_td = calc_s_c_e_td( + p.Y_td, p.tau_td, p.nominal_interest_rate, p.inflation_rate, p.E_c + ) # The after-tax return on corporate equity investments - s_c_e = calc_s_c_e(s_c_e_ft, s_c_e_td, p.alpha_c_e_ft, - p.alpha_c_e_td, p.alpha_c_e_nt, p.tau_w, p.E_c) + s_c_e = calc_s_c_e( + s_c_e_ft, + s_c_e_td, + p.alpha_c_e_ft, + p.alpha_c_e_td, + p.alpha_c_e_nt, + p.tau_w, + p.E_c, + ) # The after-tax return on corporate investments (all - debt and # equity combined) s_c = p.f_c * s_c_d + (1 - p.f_c) * s_c_e @@ -281,15 +314,18 @@ def calc_s(p): # equity combined) s_pt = p.f_pt * s_pt_d + (1 - p.f_pt) * s_pt_e # Return the after-tax rates of return on all types of investments - s_dict = {'c': {'mix': s_c, 'd': s_c_d, 'e': s_c_e}, - 'pt': {'mix': s_pt, 'd': s_pt_d, 'e': s_pt_e}} + s_dict = { + "c": {"mix": s_c, "d": s_c_d, "e": s_c_e}, + "pt": {"mix": s_pt, "d": s_pt_d, "e": s_pt_e}, + } return s_dict, E_pt -def calc_r(u, nominal_int_rate, inflation_rate, ace_int_rate, f, - int_haircut, E, ace): - r''' +def calc_r( + u, nominal_int_rate, inflation_rate, ace_int_rate, f, int_haircut, E, ace +): + r""" Compute firm nominal discount rates .. math:: @@ -309,17 +345,16 @@ def calc_r(u, nominal_int_rate, inflation_rate, ace_int_rate, f, Returns: r (array_like): nominal discount rate - ''' - r = ( - f * (nominal_int_rate * (1 - (1 - int_haircut) * u)) + (1 - f) * - (E + inflation_rate - ace_int_rate * ace) - ) + """ + r = f * (nominal_int_rate * (1 - (1 - int_haircut) * u)) + (1 - f) * ( + E + inflation_rate - ace_int_rate * ace + ) return r def calc_r_prime(nominal_int_rate, inflation_rate, f, E): - r''' + r""" Compute firm nominal, after-tax rates of return .. math:: @@ -333,9 +368,7 @@ def calc_r_prime(nominal_int_rate, inflation_rate, f, E): Returns: r_prime (array_like): nominal after-tax rate of return - ''' - r_prime = ( - f * nominal_int_rate + (1 - f) * (E + inflation_rate) - ) + """ + r_prime = f * nominal_int_rate + (1 - f) * (E + inflation_rate) return r_prime diff --git a/ccc/styles.py b/ccc/styles.py index 1171e4c5..ad6eac88 100644 --- a/ccc/styles.py +++ b/ccc/styles.py @@ -15,10 +15,10 @@ NODATA_COLOR = "#eeeeee" GRAY = "#CCCCCC" DARK_GRAY = "#6B6B73" -BLUE = '#718dbf' -RED = '#e84d60' -GREEN = '#32CD32' -PURPLE = '#C5007C' +BLUE = "#718dbf" +RED = "#e84d60" +GREEN = "#32CD32" +PURPLE = "#C5007C" AXIS_FORMATS = dict( minor_tick_in=None, @@ -29,11 +29,9 @@ axis_label_text_font=FONT, axis_label_text_font_style="italic", axis_label_text_font_size="8pt", - axis_line_color=DARK_GRAY, major_tick_line_color=DARK_GRAY, major_label_text_color=DARK_GRAY, - major_tick_line_cap="round", axis_line_cap="round", axis_line_width=1, @@ -50,31 +48,27 @@ TITLE_FORMATS = dict( text_font=FONT, - align='center', + align="center", text_color=DARK_GRAY, text_font_size="9pt", # text_baseline='bottom', ) -LINE_FORMATS = dict( - line_cap='round', - line_join='round', - line_width=2 -) +LINE_FORMATS = dict(line_cap="round", line_join="round", line_width=2) FONT_PROPS_SM = dict( text_font=FONT, - text_font_size='8pt', + text_font_size="8pt", ) FONT_PROPS_MD = dict( text_font=FONT, - text_font_size='10pt', + text_font_size="10pt", ) FONT_PROPS_LG = dict( text_font=FONT, - text_font_size='12pt', + text_font_size="12pt", ) BLANK_AXIS = dict( @@ -86,12 +80,10 @@ axis_label_text_font=FONT, axis_label_text_font_style="italic", axis_label_text_font_size="8pt", - - axis_line_color='white', - major_tick_line_color='white', - major_label_text_color='white', - axis_label_text_color='white', - + axis_line_color="white", + major_tick_line_color="white", + major_label_text_color="white", + axis_label_text_color="white", major_tick_line_cap="round", axis_line_cap="round", axis_line_width=1, diff --git a/ccc/tests/test_calcfunctions.py b/ccc/tests/test_calcfunctions.py index 0ccd91c2..4c6c7f0d 100644 --- a/ccc/tests/test_calcfunctions.py +++ b/ccc/tests/test_calcfunctions.py @@ -7,9 +7,9 @@ def test_update_depr_methods(monkeypatch): - ''' + """ Test of calcfunctions.update_depr_methods - ''' + """ p = Specification() json_str = """ {"schema": { @@ -147,27 +147,38 @@ def test_update_depr_methods(monkeypatch): """ monkeypatch.setattr(DepreciationParams, "defaults", json_str) dp = DepreciationParams() - asset_df = pd.DataFrame.from_dict({'bea_asset_code': [ - '1', '2', '3', '4', '5', '6', '7', '8', '9', '10']}) + asset_df = pd.DataFrame.from_dict( + {"bea_asset_code": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]} + ) expected_df = pd.DataFrame(dp.asset) - expected_df = pd.concat([expected_df.drop(['value'], axis=1), - expected_df['value'].apply(pd.Series)], - axis=1) - expected_df.drop(columns=['asset_name', 'minor_asset_group', - 'major_asset_group'], inplace=True) - expected_df['bea_asset_code'] = pd.Series( - ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], - index=expected_df.index) - expected_df['bonus'] = pd.Series( + expected_df = pd.concat( + [ + expected_df.drop(["value"], axis=1), + expected_df["value"].apply(pd.Series), + ], + axis=1, + ) + expected_df.drop( + columns=["asset_name", "minor_asset_group", "major_asset_group"], + inplace=True, + ) + expected_df["bea_asset_code"] = pd.Series( + ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], + index=expected_df.index, + ) + expected_df["bonus"] = pd.Series( [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], - index=expected_df.index) - expected_df['b'] = pd.Series( - [2, 1.5, 1, 1, 1, 2, 1.5, 1, 1, 1], index=expected_df.index) - expected_df['Y'] = pd.Series( - [10, 10, 3, 15, 27.5, 27.5, 10, 3, 15, 7], - index=expected_df.index) - print('Expected df =', expected_df) + index=expected_df.index, + ) + expected_df["b"] = pd.Series( + [2, 1.5, 1, 1, 1, 2, 1.5, 1, 1, 1], index=expected_df.index + ) + expected_df["Y"] = pd.Series( + [10, 10, 3, 15, 27.5, 27.5, 10, 3, 15, 7], index=expected_df.index + ) + print("Expected df =", expected_df) test_df = cf.update_depr_methods(asset_df, p, dp) + print("Test df =", test_df) assert_frame_equal(test_df, expected_df, check_like=True) @@ -176,33 +187,31 @@ def test_update_depr_methods(monkeypatch): b = np.array([1.2, 1.0, 1.5, 2.0, 1.8]) bonus = np.array([0.0, 0.0, 0.4, 1.0, 0.9]) r = np.array([0.03, 0.03, 0.03, 0.03, 0.03]) -expected_val = np.array([0.588563059, 0.956320164, 0.924042198, 1, - 0.99041001]) +expected_val = np.array([0.588563059, 0.956320164, 0.924042198, 1, 0.99041001]) test_data = [(Y, b, bonus, r, expected_val)] -@pytest.mark.parametrize('Y,b,bonus,r,expected_val', test_data, - ids=['Test 0']) +@pytest.mark.parametrize("Y,b,bonus,r,expected_val", test_data, ids=["Test 0"]) def test_dbsl(Y, b, bonus, r, expected_val): test_val = cf.dbsl(Y, b, bonus, r) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) Y = np.array([40, 1, 10, 20, 8]) bonus = np.array([0, 0, 0.4, 1, 1.2]) r = np.array([0.12, 0.12, 0.12, 0.12, 0.12]) -expected_val = np.array([0.206618803, 0.942329694, 0.749402894, 1, - 1.071436018]) +expected_val = np.array( + [0.206618803, 0.942329694, 0.749402894, 1, 1.071436018] +) test_data = [(Y, bonus, r, expected_val)] -@pytest.mark.parametrize('Y,bonus,r,expected_val', test_data, - ids=['Test 0']) +@pytest.mark.parametrize("Y,bonus,r,expected_val", test_data, ids=["Test 0"]) def test_sl(Y, bonus, r, expected_val): test_val = cf.sl(Y, bonus, r) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) delta = np.array([0.01, 0.1, 0.1, 0.02, 0.1]) @@ -213,41 +222,83 @@ def test_sl(Y, bonus, r, expected_val): test_data = [(delta, bonus, r, pi, expected_val)] -@pytest.mark.parametrize('delta,bonus,r,pi,expected_val', test_data, - ids=['Test 0']) +@pytest.mark.parametrize( + "delta,bonus,r,pi,expected_val", test_data, ids=["Test 0"] +) def test_econ(delta, bonus, r, pi, expected_val): test_val = cf.econ(delta, bonus, r, pi) - assert (np.allclose(test_val, expected_val)) - - -df = pd.DataFrame.from_dict({ - 'asset_name': ['Steam engines', 'Custom software', 'Other furniture', - 'Mining and oilfield machinery', 'Expensing', 'PCs', - 'Terminals', 'Manufacturing', 'Wind and solar', - 'Equipment'], - 'method': ['DB 200%', 'DB 150%', 'SL', 'Economic', 'Expensing', - 'DB 200%', 'DB 150%', 'SL', 'Economic', 'Expensing'], - 'Y': [10, 10, 8, 8, 8, 10, 10, 8, 8, 8], - 'delta': [0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08], - 'b': [2, 1.5, 1, 1, 1, 2, 1.5, 1, 1, 1], - 'bonus': [0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5]}) + assert np.allclose(test_val, expected_val) + + +df = pd.DataFrame.from_dict( + { + "asset_name": [ + "Steam engines", + "Custom software", + "Other furniture", + "Mining and oilfield machinery", + "Expensing", + "PCs", + "Terminals", + "Manufacturing", + "Wind and solar", + "Equipment", + ], + "method": [ + "DB 200%", + "DB 150%", + "SL", + "Economic", + "Expensing", + "DB 200%", + "DB 150%", + "SL", + "Economic", + "Expensing", + ], + "Y": [10, 10, 8, 8, 8, 10, 10, 8, 8, 8], + "delta": [0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08], + "b": [2, 1.5, 1, 1, 1, 2, 1.5, 1, 1, 1], + "bonus": [0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.5, 0.5], + } +) r = 0.05 pi = 0.02 land_expensing = 0.0 expected_df = df.copy() -expected_df['z'] = pd.Series([0.824294709, 0.801550194, 0.824199885, - 0.727272727, 1, 0.912147355, 0.900775097, - 0.912099942, 0.863636364, 1], - index=expected_df.index) -test_data = [(df, r, pi, land_expensing, expected_df['z'], )] - - -@pytest.mark.parametrize('df,r,pi,land_expensing,expected_df', - test_data, ids=['Test 0']) +expected_df["z"] = pd.Series( + [ + 0.824294709, + 0.801550194, + 0.824199885, + 0.727272727, + 1, + 0.912147355, + 0.900775097, + 0.912099942, + 0.863636364, + 1, + ], + index=expected_df.index, +) +test_data = [ + ( + df, + r, + pi, + land_expensing, + expected_df["z"], + ) +] + + +@pytest.mark.parametrize( + "df,r,pi,land_expensing,expected_df", test_data, ids=["Test 0"] +) def test_npv_tax_depr(df, r, pi, land_expensing, expected_df): test_df = cf.npv_tax_depr(df, r, pi, land_expensing) - print('Types = ', type(test_df), type(expected_df)) + print("Types = ", type(test_df), type(expected_df)) assert_series_equal(test_df, expected_df) @@ -259,17 +310,19 @@ def test_npv_tax_depr(df, r, pi, land_expensing, expected_df): pi = np.array([0.02, 0.02, 0.02, 0.02, 0.02, 0.02]) r = np.array([0.05, 0.06, 0.04, 0.03, 0.11, 0.12]) -expected_val = np.array([0.075285714, 0.0388, 0.042, 0.0112, - 0.114475829, 0.094]) +expected_val = np.array( + [0.075285714, 0.0388, 0.042, 0.0112, 0.114475829, 0.094] +) test_data = [(delta, z, w, u, inv_tax_credit, pi, r, expected_val)] -@pytest.mark.parametrize('delta,z,w,u,inv_tax_credit,pi,r,expected_val', - test_data, ids=['Test 0']) +@pytest.mark.parametrize( + "delta,z,w,u,inv_tax_credit,pi,r,expected_val", test_data, ids=["Test 0"] +) def test_eq_coc(delta, z, w, u, inv_tax_credit, pi, r, expected_val): test_val = cf.eq_coc(delta, z, w, u, inv_tax_credit, pi, r) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) u = np.array([0.3, 0, 0.3, 0, 0.3, 0]) @@ -278,103 +331,136 @@ def test_eq_coc(delta, z, w, u, inv_tax_credit, pi, r, expected_val): pi = np.array([0.02, 0.02, 0.02, 0.02, 0.02, 0.02]) r = np.array([0.05, 0.06, 0.04, 0.03, 0.11, 0.12]) -expected_val = np.array([0.042779968, 0.04, 0.029723255, 0.01, - 0.115882546, 0.1]) +expected_val = np.array( + [0.042779968, 0.04, 0.029723255, 0.01, 0.115882546, 0.1] +) test_data = [(u, phi, Y_v, pi, r, expected_val)] -@pytest.mark.parametrize('u,phi,Y_v,pi,r,expected_val', - test_data, ids=['Test 0']) +@pytest.mark.parametrize( + "u,phi,Y_v,pi,r,expected_val", test_data, ids=["Test 0"] +) def test_eq_coc_inventory(u, phi, Y_v, pi, r, expected_val): test_val = cf.eq_coc_inventory(u, phi, Y_v, pi, r) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) rho = np.array([0.075285714, 0.0388, 0.042, 0.0112, 0.114475829, 0.094]) delta = np.array([0.05, 0.06, 0.04, 0.03, 0.11, 0.12]) -expected_val = np.array([0.125285714, 0.0988, 0.082, 0.0412, - 0.224475829, 0.214]) +expected_val = np.array( + [0.125285714, 0.0988, 0.082, 0.0412, 0.224475829, 0.214] +) test_data = [(rho, delta, expected_val)] -@pytest.mark.parametrize('rho,delta,expected_val', test_data, - ids=['Test 0']) +@pytest.mark.parametrize("rho,delta,expected_val", test_data, ids=["Test 0"]) def test_eq_ucc(rho, delta, expected_val): test_val = cf.eq_ucc(rho, delta) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) rho = np.array([0.075285714, 0.0388, 0.042, 0.0112, 0.114475829, 0.094]) r_prime = np.array([0.05, 0.06, 0.04, 0.03, 0.11, 0.12]) pi = 0.02 -expected_val = np.array([0.601518027, -0.030927835, 0.523809524, - 0.107142857, 0.213807831, -0.063829787]) +expected_val = np.array( + [ + 0.601518027, + -0.030927835, + 0.523809524, + 0.107142857, + 0.213807831, + -0.063829787, + ] +) z2 = cf.econ(0.05, 0.0, 0.04, 0.02) -rho2 = cf.eq_coc( - 0.05, z2, 0.0, 0.35, 0.0, 0.02, 0.04) +rho2 = cf.eq_coc(0.05, z2, 0.0, 0.35, 0.0, 0.02, 0.04) expected_val2 = 0.35 -rho3 = cf.eq_coc( - 0.05, 1.0, 0.0, 0.35, 0.0, 0.02, 0.04) -test_data = [(rho, r_prime, pi, expected_val), - (rho2, 0.04, 0.02, expected_val2), - (rho3, 0.04, 0.02, 0.0)] - - -@pytest.mark.parametrize('rho,r_prime,pi,expected_val', test_data, - ids=['Test: vector', 'Test: statutory', - 'Test: 0 rate']) +rho3 = cf.eq_coc(0.05, 1.0, 0.0, 0.35, 0.0, 0.02, 0.04) +test_data = [ + (rho, r_prime, pi, expected_val), + (rho2, 0.04, 0.02, expected_val2), + (rho3, 0.04, 0.02, 0.0), +] + + +@pytest.mark.parametrize( + "rho,r_prime,pi,expected_val", + test_data, + ids=["Test: vector", "Test: statutory", "Test: 0 rate"], +) def test_eq_metr(rho, r_prime, pi, expected_val): test_val = cf.eq_metr(rho, r_prime, pi) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) rho = np.array([0.075285714, 0.0388, 0.042, 0.0112, 0.114475829, 0.094]) s = np.array([0.05, 0.06, 0.04, 0.03, 0.11, 0.12]) -expected_val = np.array([0.335863378, -0.546391753, 0.047619048, - -1.678571429, 0.03909846, -0.276595745]) +expected_val = np.array( + [ + 0.335863378, + -0.546391753, + 0.047619048, + -1.678571429, + 0.03909846, + -0.276595745, + ] +) test_data = [(rho, s, expected_val)] -@pytest.mark.parametrize('rho,s,expected_val', test_data, - ids=['Test 0']) +@pytest.mark.parametrize("rho,s,expected_val", test_data, ids=["Test 0"]) def test_eq_mettr(rho, s, expected_val): test_val = cf.eq_mettr(rho, s) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) rho = np.array([0.075285714, 0.0388, 0.042, 0.0112, 0.114475829, 0.094]) s = np.array([0.05, 0.06, 0.04, 0.03, 0.11, 0.12]) -expected_val = np.array([0.02528571, -0.0212, 0.002, -0.0188, - 0.00447583, -0.026]) +expected_val = np.array( + [0.02528571, -0.0212, 0.002, -0.0188, 0.00447583, -0.026] +) test_data = [(rho, s, expected_val)] -@pytest.mark.parametrize('rho,s,expected_val', test_data, - ids=['Test 0']) +@pytest.mark.parametrize("rho,s,expected_val", test_data, ids=["Test 0"]) def test_eq_tax_wedge(rho, s, expected_val): test_val = cf.eq_tax_wedge(rho, s) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) rho = np.array([0.075285714, 0.0388, 0.042, 0.0112, 0.114475829, 0.094]) -metr = np.array([0.601518027, -0.030927835, 0.523809524, 0.107142857, - 0.213807831, -0.063829787]) +metr = np.array( + [ + 0.601518027, + -0.030927835, + 0.523809524, + 0.107142857, + 0.213807831, + -0.063829787, + ] +) profit_rate = np.array([0.1, 0.2, 0.3, 0.05, 0.5, 1]) u = np.array([0.35, 0.21, 0, 0.4, 1, 0.9]) -expected_val = np.array([0.539357143, 0.16326, 0.073333333, 0.3344, - 0.82, 0.8094]) -test_data = [(rho, metr, profit_rate, u, expected_val), - (rho[0], metr[0], rho[0], u[0], metr[0])] - - -@pytest.mark.parametrize('rho,metr,profit_rate,u,expected_val', test_data, - ids=['Test 0', 'Test: eatr=metr']) +expected_val = np.array( + [0.539357143, 0.16326, 0.073333333, 0.3344, 0.82, 0.8094] +) +test_data = [ + (rho, metr, profit_rate, u, expected_val), + (rho[0], metr[0], rho[0], u[0], metr[0]), +] + + +@pytest.mark.parametrize( + "rho,metr,profit_rate,u,expected_val", + test_data, + ids=["Test 0", "Test: eatr=metr"], +) def test_eq_eatr(rho, metr, profit_rate, u, expected_val): test_val = cf.eq_eatr(rho, metr, profit_rate, u) - assert (np.allclose(test_val, expected_val)) + assert np.allclose(test_val, expected_val) diff --git a/ccc/tests/test_calculator.py b/ccc/tests/test_calculator.py index 769ee74c..22d87448 100644 --- a/ccc/tests/test_calculator.py +++ b/ccc/tests/test_calculator.py @@ -5,15 +5,19 @@ from ccc.data import Assets from ccc.calculator import Calculator import os + CURRENT_PATH = os.path.abspath(os.path.dirname(__file__)) -setattr(DepreciationParams, "defaults", os.path.join( - CURRENT_PATH, "..", "tax_depreciation_rules.json")) +setattr( + DepreciationParams, + "defaults", + os.path.join(CURRENT_PATH, "..", "tax_depreciation_rules.json"), +) def test_Calculator_exception1(): - ''' + """ Raise exception for not passing parameters object - ''' + """ assets = Assets() dp = DepreciationParams() with pytest.raises(Exception): @@ -21,9 +25,9 @@ def test_Calculator_exception1(): def test_Calculator_exception2(): - ''' + """ Raise exception for not passing depreciation parameters object - ''' + """ p = Specification() assets = Assets() with pytest.raises(Exception): @@ -31,9 +35,9 @@ def test_Calculator_exception2(): def test_Calculator_exception3(): - ''' + """ Raise exception for not passing assets object - ''' + """ p = Specification() dp = DepreciationParams() with pytest.raises(Exception): @@ -41,119 +45,128 @@ def test_Calculator_exception3(): def test_calc_other(): - ''' + """ Test calc_other method - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) df = calc.calc_by_asset() calc_other_df = calc.calc_other(df) - assert ('ucc_mix' in calc_other_df.keys()) - assert ('metr_mix' in calc_other_df.keys()) - assert ('mettr_mix' in calc_other_df.keys()) - assert ('tax_wedge_mix' in calc_other_df.keys()) - assert ('eatr_mix' in calc_other_df.keys()) + assert "ucc_mix" in calc_other_df.keys() + assert "metr_mix" in calc_other_df.keys() + assert "mettr_mix" in calc_other_df.keys() + assert "tax_wedge_mix" in calc_other_df.keys() + assert "eatr_mix" in calc_other_df.keys() def test_calc_base(): - ''' + """ Test calc_base method - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) calc.calc_base() calc_base_df = calc._Calculator__assets.df - assert ('z_mix' in calc_base_df.keys()) - assert ('rho_mix' in calc_base_df.keys()) + assert "z_mix" in calc_base_df.keys() + assert "rho_mix" in calc_base_df.keys() def test_calc_all(): - ''' + """ Test calc_all method - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) calc.calc_all() calc_all_df = calc._Calculator__assets.df - assert ('z_mix' in calc_all_df.keys()) - assert ('rho_mix' in calc_all_df.keys()) - assert ('ucc_mix' in calc_all_df.keys()) - assert ('metr_mix' in calc_all_df.keys()) - assert ('mettr_mix' in calc_all_df.keys()) - assert ('tax_wedge_mix' in calc_all_df.keys()) - assert ('eatr_mix' in calc_all_df.keys()) - - -@pytest.mark.parametrize('include_land,include_inventories', - [(False, False), (True, True)], - ids=['No land or inv', 'Both land and inv']) + assert "z_mix" in calc_all_df.keys() + assert "rho_mix" in calc_all_df.keys() + assert "ucc_mix" in calc_all_df.keys() + assert "metr_mix" in calc_all_df.keys() + assert "mettr_mix" in calc_all_df.keys() + assert "tax_wedge_mix" in calc_all_df.keys() + assert "eatr_mix" in calc_all_df.keys() + + +@pytest.mark.parametrize( + "include_land,include_inventories", + [(False, False), (True, True)], + ids=["No land or inv", "Both land and inv"], +) def test_calc_by_asset(include_land, include_inventories): - ''' + """ Test calc_by_asset method - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) asset_df = calc.calc_by_asset( - include_land=include_land, - include_inventories=include_inventories + include_land=include_land, include_inventories=include_inventories ) - assert ('major_asset_group' in asset_df.keys()) + assert "major_asset_group" in asset_df.keys() -@pytest.mark.parametrize('include_land,include_inventories', - [(False, False), (True, True)], - ids=['No land or inv', 'Both land and inv']) +@pytest.mark.parametrize( + "include_land,include_inventories", + [(False, False), (True, True)], + ids=["No land or inv", "Both land and inv"], +) def test_calc_by_industry(include_land, include_inventories): - ''' + """ Test calc_by_industry method - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) ind_df = calc.calc_by_industry( - include_land=include_land, - include_inventories=include_inventories) - assert ('major_industry' in ind_df.keys()) + include_land=include_land, include_inventories=include_inventories + ) + assert "major_industry" in ind_df.keys() -@pytest.mark.parametrize('include_land,include_inventories', - [(False, False), (True, True)], - ids=['No land or inv', 'Both land and inv']) +@pytest.mark.parametrize( + "include_land,include_inventories", + [(False, False), (True, True)], + ids=["No land or inv", "Both land and inv"], +) def test_summary_table(include_land, include_inventories): - ''' + """ Test summary_table method. - ''' + """ cyr = 2018 assets = Assets() p = Specification(year=cyr) dp = DepreciationParams() calc1 = Calculator(p, dp, assets) assert calc1.current_year == cyr - p.update_specification({'CIT_rate': 0.38}) + p.update_specification({"CIT_rate": 0.38}) calc2 = Calculator(p, dp, assets) assert calc2.current_year == cyr summary_df = calc1.summary_table( - calc2, include_land=include_land, - include_inventories=include_inventories) + calc2, + include_land=include_land, + include_inventories=include_inventories, + ) assert isinstance(summary_df, pd.DataFrame) -@pytest.mark.parametrize('include_land,include_inventories', - [(False, False), (True, True)], - ids=['No land or inv', 'Both land and inv']) +@pytest.mark.parametrize( + "include_land,include_inventories", + [(False, False), (True, True)], + ids=["No land or inv", "Both land and inv"], +) def test_asset_share_table(include_land, include_inventories): - ''' + """ Test asset_share_table method. - ''' + """ cyr = 2018 assets = Assets() p = Specification(year=cyr) @@ -161,124 +174,136 @@ def test_asset_share_table(include_land, include_inventories): calc1 = Calculator(p, dp, assets) assert calc1.current_year == cyr asset_df = calc1.asset_share_table( - include_land=include_land, - include_inventories=include_inventories) + include_land=include_land, include_inventories=include_inventories + ) assert isinstance(asset_df, pd.DataFrame) -@pytest.mark.parametrize('include_land,include_inventories', - [(False, False), (True, True)], - ids=['No land or inv', 'Both land and inv']) +@pytest.mark.parametrize( + "include_land,include_inventories", + [(False, False), (True, True)], + ids=["No land or inv", "Both land and inv"], +) def test_asset_summary_table(include_land, include_inventories): - ''' + """ Test asset_summary_table method. - ''' + """ cyr = 2018 assets = Assets() p = Specification(year=cyr) dp = DepreciationParams() calc1 = Calculator(p, dp, assets) assert calc1.current_year == cyr - p.update_specification({'CIT_rate': 0.38}) + p.update_specification({"CIT_rate": 0.38}) calc2 = Calculator(p, dp, assets) assert calc2.current_year == cyr asset_df = calc1.asset_summary_table( - calc2, include_land=include_land, - include_inventories=include_inventories) + calc2, + include_land=include_land, + include_inventories=include_inventories, + ) assert isinstance(asset_df, pd.DataFrame) -@pytest.mark.parametrize('include_land,include_inventories', - [(False, False), (True, True)], - ids=['No land or inv', 'Both land and inv']) +@pytest.mark.parametrize( + "include_land,include_inventories", + [(False, False), (True, True)], + ids=["No land or inv", "Both land and inv"], +) def test_industry_summary_table(include_land, include_inventories): - ''' + """ Test industry_summary_table method. - ''' + """ cyr = 2018 assets = Assets() p = Specification(year=cyr) dp = DepreciationParams() calc1 = Calculator(p, dp, assets) assert calc1.current_year == cyr - p.update_specification({'CIT_rate': 0.38}) + p.update_specification({"CIT_rate": 0.38}) calc2 = Calculator(p, dp, assets) assert calc2.current_year == cyr ind_df = calc1.industry_summary_table( - calc2, include_land=include_land, - include_inventories=include_inventories) + calc2, + include_land=include_land, + include_inventories=include_inventories, + ) assert isinstance(ind_df, pd.DataFrame) -@pytest.mark.parametrize('corporate', [True, False], - ids=['Corporate', 'Non-Corporate']) +@pytest.mark.parametrize( + "corporate", [True, False], ids=["Corporate", "Non-Corporate"] +) def test_range_plot(corporate): - ''' + """ Test range_plot method. - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) p2 = Specification(year=2026) - p2.update_specification({'CIT_rate': 0.25}) + p2.update_specification({"CIT_rate": 0.25}) calc2 = Calculator(p2, dp, assets) - fig = calc.range_plot(calc2, corporate=corporate, - include_title=True) + fig = calc.range_plot(calc2, corporate=corporate, include_title=True) assert fig - fig = calc.range_plot(calc2, output_variable='rho', - corporate=corporate, include_title=True) + fig = calc.range_plot( + calc2, output_variable="rho", corporate=corporate, include_title=True + ) assert fig -@pytest.mark.parametrize('corporate', [True, False], - ids=['Corporate', 'Non-Corporate']) +@pytest.mark.parametrize( + "corporate", [True, False], ids=["Corporate", "Non-Corporate"] +) def test_grouped_bar(corporate): - ''' + """ Test grouped_bar method. - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) p2 = Specification(year=2026) - p2.update_specification({'CIT_rate': 0.25}) + p2.update_specification({"CIT_rate": 0.25}) calc2 = Calculator(p2, dp, assets) fig = calc.grouped_bar(calc2, corporate=corporate) assert fig - fig = calc.grouped_bar(calc2, output_variable='rho', - corporate=corporate, group_by_asset=False) + fig = calc.grouped_bar( + calc2, output_variable="rho", corporate=corporate, group_by_asset=False + ) assert fig def test_asset_bubble(): - ''' + """ Test asset bubble plot method. - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) p2 = Specification(year=2026) - p2.update_specification({'CIT_rate': 0.25}) + p2.update_specification({"CIT_rate": 0.25}) calc2 = Calculator(p2, dp, assets) fig = calc.asset_bubble(calc2, include_title=True) assert fig - fig = calc.asset_bubble(calc2, output_variable='rho_mix', - include_title=True) + fig = calc.asset_bubble( + calc2, output_variable="rho_mix", include_title=True + ) assert fig def test_bubble_widget(): - ''' + """ Test asset bubble plot method. - ''' + """ assets = Assets() p = Specification() dp = DepreciationParams() calc = Calculator(p, dp, assets) p2 = Specification(year=2026) - p2.update_specification({'CIT_rate': 0.25}) + p2.update_specification({"CIT_rate": 0.25}) calc2 = Calculator(p2, dp, assets) fig = calc.bubble_widget(calc2) assert fig @@ -308,7 +333,7 @@ def test_p_param_return_value(): p = Specification() dp = DepreciationParams() calc1 = Calculator(p, dp, assets) - obj = calc1.p_param('tau_int') + obj = calc1.p_param("tau_int") assert np.allclose(obj, np.array([0.31391843])) @@ -318,7 +343,7 @@ def test_p_param_set_value(): dp = DepreciationParams() calc1 = Calculator(p, dp, assets) new_tau_int = np.array([0.396]) - calc1.p_param('tau_int', new_tau_int) + calc1.p_param("tau_int", new_tau_int) assert np.allclose(calc1._Calculator__p.tau_int, new_tau_int) diff --git a/ccc/tests/test_data.py b/ccc/tests/test_data.py index e287155a..85f66373 100644 --- a/ccc/tests/test_data.py +++ b/ccc/tests/test_data.py @@ -5,61 +5,59 @@ def test_data_year(): - ''' + """ Test of Assets.data_year() method - ''' + """ assets = Assets() assert assets.data_year == ASSET_DATA_CSV_YEAR def test_array_length(): - ''' + """ Test of Assets.array_length() method - ''' + """ assets = Assets() - df = read_egg_csv('ccc_asset_data.csv') + df = read_egg_csv("ccc_asset_data.csv") assert assets.array_length == df.shape[0] def test_read_var_info(): - ''' + """ Test of Assets.read_var_info() method - ''' + """ assets = Assets() - expected_dict = read_egg_json('records_variables.json') + expected_dict = read_egg_json("records_variables.json") test_dict = assets.read_var_info() for k, v in expected_dict.items(): - print('Item is ', k) + print("Item is ", k) assert v == test_dict[k] def test_read_data(): - ''' + """ Test of Assets._read_data() method - ''' + """ assets = Assets() - df = read_egg_csv('ccc_asset_data.csv') - assets = Assets(data='ccc_asset_data.csv') - assets._read_data('ccc_asset_data.csv') - pd.testing.assert_frame_equal( - df, assets.df) + df = read_egg_csv("ccc_asset_data.csv") + assets = Assets(data="ccc_asset_data.csv") + assets._read_data("ccc_asset_data.csv") + pd.testing.assert_frame_equal(df, assets.df) def test_read_data_df(): - ''' + """ Test of Assets._read_data() method - ''' - df = read_egg_csv('ccc_asset_data.csv') + """ + df = read_egg_csv("ccc_asset_data.csv") assets = Assets(data=df) - pd.testing.assert_frame_equal( - df, assets.df) + pd.testing.assert_frame_equal(df, assets.df) def test_read_data_exception(): - ''' + """ Test of Assets._read_data() method - ''' + """ with pytest.raises(Exception): assert Assets(data=3) diff --git a/ccc/tests/test_get_taxcalc_rates.py b/ccc/tests/test_get_taxcalc_rates.py index 188f7e51..c928328b 100644 --- a/ccc/tests/test_get_taxcalc_rates.py +++ b/ccc/tests/test_get_taxcalc_rates.py @@ -6,52 +6,55 @@ def test_get_calculator_cps(): - ''' + """ Test the get_calculator() function - ''' + """ calc1 = tc.get_calculator(True, 2019) assert calc1.current_year == 2019 @pytest.mark.needs_puf -@pytest.mark.parametrize('data', ['puf.csv', None], - ids=['data=PUF', 'data=None']) +@pytest.mark.parametrize( + "data", ["puf.csv", None], ids=["data=PUF", "data=None"] +) def test_get_calculator(data): - ''' + """ Test the get_calculator() function - ''' + """ calc1 = tc.get_calculator(True, 2019, data=data) assert calc1.current_year == 2019 def test_get_calculator_exception(): - ''' + """ Test the get_calculator() function - ''' + """ with pytest.raises(Exception): assert tc.get_calculator(True, TC_LAST_YEAR + 1) def test_get_rates(): - ''' + """ Test of the get_rates() functions - ''' + """ p = Specification(year=2020) # has default tax rates, with should equal TC test_dict = tc.get_rates( - baseline=False, start_year=2020, reform={}, data='cps') + baseline=False, start_year=2020, reform={}, data="cps" + ) for k, v in test_dict.items(): - print('Tax rate = ', k) - assert (np.allclose(v, p.__dict__[k], atol=1e-4)) + print("Tax rate = ", k) + assert np.allclose(v, p.__dict__[k], atol=1e-4) -@pytest.mark.parametrize('reform,expected', - [({'key1': {'key2': 1.0}}, False), - ({'key1': 'string'}, True)], - ids=['assert False', 'assert True']) +@pytest.mark.parametrize( + "reform,expected", + [({"key1": {"key2": 1.0}}, False), ({"key1": "string"}, True)], + ids=["assert False", "assert True"], +) def test_is_paramtools_format(reform, expected): - ''' + """ Test get_taxcalc_rates.is_parametools_format function. - ''' + """ returned_value = tc.is_paramtools_format(reform) - assert (expected == returned_value) + assert expected == returned_value diff --git a/ccc/tests/test_parameters.py b/ccc/tests/test_parameters.py index b014286f..31edb049 100644 --- a/ccc/tests/test_parameters.py +++ b/ccc/tests/test_parameters.py @@ -3,11 +3,12 @@ from ccc.parameters import DepreciationParams -test_data = [(27.5, '27_5'), (30, '30')] +test_data = [(27.5, "27_5"), (30, "30")] -@pytest.mark.parametrize('call_tc', [False, True], - ids=['Not use TC', 'Use TC']) +@pytest.mark.parametrize( + "call_tc", [False, True], ids=["Not use TC", "Use TC"] +) def test_create_specification_object(call_tc): spec = Specification(call_tc=call_tc) assert spec @@ -22,10 +23,7 @@ def test_default_parameters(): def test_update_specification_with_dict(): cyr = 2020 spec = Specification(year=cyr) - new_spec_dict = { - 'profit_rate': 0.4, - 'm': 0.5 - } + new_spec_dict = {"profit_rate": 0.4, "m": 0.5} spec.update_specification(new_spec_dict) assert spec.profit_rate == 0.4 assert spec.m == 0.5 @@ -36,7 +34,7 @@ def test_new_view(): cyr = 2020 spec = Specification(year=cyr) new_spec_dict = { - 'new_view': True, + "new_view": True, } spec.update_specification(new_spec_dict) assert spec.new_view @@ -46,13 +44,10 @@ def test_new_view(): def test_pt_tax(): cyr = 2020 spec = Specification(year=cyr) - new_spec_dict = { - 'pt_entity_tax_ind': True, - 'pt_entity_tax_rate': 0.44 - } + new_spec_dict = {"pt_entity_tax_ind": True, "pt_entity_tax_rate": 0.44} spec.update_specification(new_spec_dict) assert spec.pt_entity_tax_ind - assert spec.u['pt'] == 0.44 + assert spec.u["pt"] == 0.44 def test_update_specification_with_json(): @@ -83,31 +78,24 @@ def test_update_specification_with_json(): def test_update_bad_revision1(): spec = Specification() # profit rate has an upper bound at 1.0 - revs = { - 'profit_rate': [{'year': spec.year, 'value': 1.2}] - } + revs = {"profit_rate": [{"year": spec.year, "value": 1.2}]} spec.update_specification(revs, raise_errors=False) assert len(spec.errors) > 0 - first_line = spec.errors['profit_rate'][0] + first_line = spec.errors["profit_rate"][0] print(first_line) - expected_first_line =\ - 'profit_rate[year=2022] 1.2 > max 1.0 ' + expected_first_line = "profit_rate[year=2022] 1.2 > max 1.0 " assert first_line == expected_first_line def test_update_bad_revsions2(): spec = Specification() # Pick a category for depreciation that is out of bounds - revs = { - 'profit_rate': 0.5, - 'pt_entity_tax_rate': 1.2} + revs = {"profit_rate": 0.5, "pt_entity_tax_rate": 1.2} spec.update_specification(revs, raise_errors=False) assert len(spec.errors) > 0 - first_line = spec.errors['pt_entity_tax_rate'][0] - print('First line = ', first_line) - expected_first_line = ( - 'pt_entity_tax_rate 1.2 > max 1.0 ' - ) + first_line = spec.errors["pt_entity_tax_rate"][0] + print("First line = ", first_line) + expected_first_line = "pt_entity_tax_rate 1.2 > max 1.0 " assert first_line == expected_first_line @@ -128,9 +116,7 @@ def test_update_bad_revsions4(): def test_update_bad_revsions5(): spec = Specification() # profit rate has an upper bound at 1.0 - revs = { - 'profit_rate': [{'year': spec.year, 'value': 1.2}] - } + revs = {"profit_rate": [{"year": spec.year, "value": 1.2}]} with pytest.raises(Exception): assert spec.update_specification(revs, raise_errors=True) @@ -162,18 +148,17 @@ def test_read_json_revision(): def test_revision_warnings_errors(): - revs_dict_good = {'profit_rate': [{'year': 2020, 'value': 0.30}]} + revs_dict_good = {"profit_rate": [{"year": 2020, "value": 0.30}]} e_w = revision_warnings_errors(revs_dict_good) - assert len(e_w['warnings']) == 0 - assert len(e_w['errors']) == 0 - revs_dict_bad = {'profit_rate': [{'year': 2020, 'value': -0.10}]} + assert len(e_w["warnings"]) == 0 + assert len(e_w["errors"]) == 0 + revs_dict_bad = {"profit_rate": [{"year": 2020, "value": -0.10}]} e_w = revision_warnings_errors(revs_dict_bad) - assert len(e_w['warnings']) == 0 - assert len(e_w['errors']) > 0 + assert len(e_w["warnings"]) == 0 + assert len(e_w["errors"]) > 0 revs_dict_badder = 999 e_w = revision_warnings_errors(revs_dict_badder) - assert e_w['errors'] ==\ - 'ERROR: revision is not a dictionary or string' + assert e_w["errors"] == "ERROR: revision is not a dictionary or string" def test_create_depreciation_parameters_object(): @@ -182,30 +167,50 @@ def test_create_depreciation_parameters_object(): def test_update_depreciation_params_with_dict(): - expected_result = [{ - 'year': 2020, 'value': {'method': 'Expensing', 'life': 5}, - 'GDS_life': 3.0, 'ADS_life': 3.0, - 'major_asset_group': 'Equipment', - 'minor_asset_group': 'Computers and Software', 'system': 'GDS', - 'asset_name': 'Custom software', 'BEA_code': 'ENS2'}] + expected_result = [ + { + "year": 2020, + "value": {"method": "Expensing", "life": 5}, + "GDS_life": 3.0, + "ADS_life": 3.0, + "major_asset_group": "Equipment", + "minor_asset_group": "Computers and Software", + "system": "GDS", + "asset_name": "Custom software", + "BEA_code": "ENS2", + } + ] dp = DepreciationParams() - new_dp_dict = {"asset": [ - {"year": 2020, - "asset_name": "Custom software", - "value": {"life": 5, "method": "Expensing"}}]} + new_dp_dict = { + "asset": [ + { + "year": 2020, + "asset_name": "Custom software", + "value": {"life": 5, "method": "Expensing"}, + } + ] + } dp.adjust(new_dp_dict) test_result = dp.select_eq( - param="asset", strict=False, year=2020, BEA_code="ENS2") + param="asset", strict=False, year=2020, BEA_code="ENS2" + ) assert test_result == expected_result def test_update_depreciation_params_with_json(): - expected_result = [{ - 'year': 2020, 'value': {'method': 'Expensing', 'life': 5}, - 'GDS_life': 3.0, 'ADS_life': 3.0, - 'major_asset_group': 'Equipment', - 'minor_asset_group': 'Computers and Software', 'system': 'GDS', - 'asset_name': 'Custom software', 'BEA_code': 'ENS2'}] + expected_result = [ + { + "year": 2020, + "value": {"method": "Expensing", "life": 5}, + "GDS_life": 3.0, + "ADS_life": 3.0, + "major_asset_group": "Equipment", + "minor_asset_group": "Computers and Software", + "system": "GDS", + "asset_name": "Custom software", + "BEA_code": "ENS2", + } + ] dp = DepreciationParams() new_dp_json = """ {"asset": [ @@ -215,45 +220,64 @@ def test_update_depreciation_params_with_json(): """ dp.adjust(new_dp_json) test_result = dp.select_eq( - param="asset", strict=False, year=2020, BEA_code="ENS2") + param="asset", strict=False, year=2020, BEA_code="ENS2" + ) assert test_result == expected_result def test_update_depreciation_params_as_a_group(): dp = DepreciationParams() new_dp_dict = { - "asset": [{"major_asset_group": "Intellectual Property", - "value": {"life": 12, "method": "DB 200%"}}]} + "asset": [ + { + "major_asset_group": "Intellectual Property", + "value": {"life": 12, "method": "DB 200%"}, + } + ] + } dp.adjust(new_dp_dict) test_result = dp.select_eq( - param="asset", strict=False, year=2020, - major_asset_group="Intellectual Property") - assert test_result[0]['value']['life'] == 12 - assert test_result[1]['value']['life'] == 12 - assert test_result[2]['value']['life'] == 12 + param="asset", + strict=False, + year=2020, + major_asset_group="Intellectual Property", + ) + assert test_result[0]["value"]["life"] == 12 + assert test_result[1]["value"]["life"] == 12 + assert test_result[2]["value"]["life"] == 12 def test_update_depreciation_bad_revision(): - ''' + """ Check that parameter out of range raises exception - ''' + """ dp = DepreciationParams() - new_dp_dict = {"asset": [ - {"year": 2020, - "asset_name": "Custom software", - "value": {"life": 12, "method": "Expensing2"}}]} + new_dp_dict = { + "asset": [ + { + "year": 2020, + "asset_name": "Custom software", + "value": {"life": 12, "method": "Expensing2"}, + } + ] + } with pytest.raises(Exception): assert dp.adjust(new_dp_dict) def test_update_depreciation_bad_revision2(): - ''' + """ Check that parameter out of range raises exception - ''' + """ dp = DepreciationParams() - new_dp_dict = {"asset": [ - {"year": 2020, - "asset_name": "Custom software", - "value": {"life": 122.0, "method": "Expensing"}}]} + new_dp_dict = { + "asset": [ + { + "year": 2020, + "asset_name": "Custom software", + "value": {"life": 122.0, "method": "Expensing"}, + } + ] + } with pytest.raises(Exception): assert dp.adjust(new_dp_dict) diff --git a/ccc/tests/test_paramfunctions.py b/ccc/tests/test_paramfunctions.py index 1a83ef10..4184b1b6 100644 --- a/ccc/tests/test_paramfunctions.py +++ b/ccc/tests/test_paramfunctions.py @@ -5,149 +5,197 @@ def test_calc_sprime_c_td(): - ''' + """ Test of the paramfunctions.calc_sprime_c_td function - ''' + """ Y_td, tau_td, i, pi = 8, 0.2, 0.08, 0.02 test_value = pf.calc_sprime_c_td(Y_td, tau_td, i, pi) - assert (np.allclose(test_value, 0.047585769)) + assert np.allclose(test_value, 0.047585769) def test_calc_s_c_d_td(): - ''' + """ Test of the paramfunctions.calc_s_c_d_td function - ''' + """ sprime_c_td = 0.047585769 gamma, i, pi = 0.5, 0.08, 0.02 test_value = pf.calc_s_c_d_td(sprime_c_td, gamma, i, pi) - assert (np.allclose(test_value, 0.053792884)) + assert np.allclose(test_value, 0.053792884) def test_calc_s__d(): - ''' + """ Test of the paramfunctions.calc_s__d function - ''' + """ s_d_td = 0.053792884 alpha_d_ft, alpha_d_td, alpha_d_nt = 0.3, 0.5, 0.2 tau_int, tau_w, i, pi = 0.3, 0.02, 0.08, 0.02 - test_value = pf.calc_s__d(s_d_td, alpha_d_ft, alpha_d_td, - alpha_d_nt, tau_int, tau_w, i, pi) + test_value = pf.calc_s__d( + s_d_td, alpha_d_ft, alpha_d_td, alpha_d_nt, tau_int, tau_w, i, pi + ) - assert (np.allclose(test_value, 0.029696442)) + assert np.allclose(test_value, 0.029696442) def test_calc_g__g(): - ''' + """ Test of the paramfunctions.calc_g__g function - ''' + """ Y_g, tau_cg, m, E_c, pi = 2.0, 0.35, 0.4, 0.09, 0.02 test_value = pf.calc_g__g(Y_g, tau_cg, m, E_c, pi) - assert (np.allclose(test_value, 0.017105186)) + assert np.allclose(test_value, 0.017105186) def test_calc_g(): - ''' + """ Test of the paramfunctions.calc_g function - ''' + """ g_scg, g_lcg = 0.017105186, 0.030329578 omega_scg, omega_lcg, omega_xcg = 0.7, 0.1, 0.2 m, E_c = 0.4, 0.09 g_xcg = m * E_c test_value = pf.calc_g( - g_scg, g_lcg, g_xcg, omega_scg, omega_lcg, omega_xcg, m, E_c) + g_scg, g_lcg, g_xcg, omega_scg, omega_lcg, omega_xcg, m, E_c + ) - assert (np.allclose(test_value, 0.022206588)) + assert np.allclose(test_value, 0.022206588) def test_calc_s_c_e_td(): - ''' + """ Test of the paramfunctions.calc_s_c_e_td function - ''' + """ Y_td, tau_td, i, pi, E_c = 8, 0.2, 0.08, 0.02, 0.09 test_value = pf.calc_s_c_e_td(Y_td, tau_td, i, pi, E_c) - assert (np.allclose(test_value, 0.074440094)) + assert np.allclose(test_value, 0.074440094) def test_calc_s_c_e(): - ''' + """ Test of the paramfunctions.calc_s_c_e function - ''' + """ s_c_e_ft, s_c_e_td = 0.062706588, 0.074440094 alpha_c_e_ft, alpha_c_e_td, alpha_c_e_nt = 0.6, 0.3, 0.1 tau_w, E_c = 0.02, 0.09 test_value = pf.calc_s_c_e( - s_c_e_ft, s_c_e_td, alpha_c_e_ft, alpha_c_e_td, alpha_c_e_nt, - tau_w, E_c) + s_c_e_ft, + s_c_e_td, + alpha_c_e_ft, + alpha_c_e_td, + alpha_c_e_nt, + tau_w, + E_c, + ) - assert (np.allclose(test_value, 0.048955981)) + assert np.allclose(test_value, 0.048955981) p = Specification() -revisions = {'Y_td': 8, 'Y_scg': 2, 'Y_lcg': 7, 'tau_td': 0.2, - 'tau_int': 0.3, 'tau_scg': 0.35, 'tau_lcg': 0.12, - 'tau_div': 0.25, 'tau_w': 0.02, 'gamma': 0.5, 'm': 0.4, - 'E_c': 0.09, 'nominal_interest_rate': 0.08, - 'inflation_rate': 0.02, 'alpha_c_d_ft': 0.3, - 'alpha_c_d_td': 0.5, 'alpha_c_d_nt': 0.2, - 'alpha_pt_d_ft': 0.5, 'alpha_pt_d_td': 0.4, - 'alpha_pt_d_nt': 0.1, 'alpha_c_e_ft': 0.6, - 'alpha_c_e_td': 0.3, 'alpha_c_e_nt': 0.1, 'omega_scg': 0.7, - 'omega_lcg': 0.1, 'omega_xcg': 0.2, 'f_c': 0.32, - 'f_pt': 0.42} +revisions = { + "Y_td": 8, + "Y_scg": 2, + "Y_lcg": 7, + "tau_td": 0.2, + "tau_int": 0.3, + "tau_scg": 0.35, + "tau_lcg": 0.12, + "tau_div": 0.25, + "tau_w": 0.02, + "gamma": 0.5, + "m": 0.4, + "E_c": 0.09, + "nominal_interest_rate": 0.08, + "inflation_rate": 0.02, + "alpha_c_d_ft": 0.3, + "alpha_c_d_td": 0.5, + "alpha_c_d_nt": 0.2, + "alpha_pt_d_ft": 0.5, + "alpha_pt_d_td": 0.4, + "alpha_pt_d_nt": 0.1, + "alpha_c_e_ft": 0.6, + "alpha_c_e_td": 0.3, + "alpha_c_e_nt": 0.1, + "omega_scg": 0.7, + "omega_lcg": 0.1, + "omega_xcg": 0.2, + "f_c": 0.32, + "f_pt": 0.42, +} p.update_specification(revisions) expected_dict = { - 'c': {'mix': 0.042792929, 'd': 0.029696442, 'e': 0.048955981}, - 'pt': {'mix': 0.027511674, 'd': 0.025517154, 'e': 0.028955981}} + "c": {"mix": 0.042792929, "d": 0.029696442, "e": 0.048955981}, + "pt": {"mix": 0.027511674, "d": 0.025517154, "e": 0.028955981}, +} -@pytest.mark.parametrize('entity_type,p,expected_dict', - [('c', p, expected_dict), - ('pt', p, expected_dict)], - ids=['Corporate', 'Pass-Throughs']) +@pytest.mark.parametrize( + "entity_type,p,expected_dict", + [("c", p, expected_dict), ("pt", p, expected_dict)], + ids=["Corporate", "Pass-Throughs"], +) def test_calc_s(entity_type, p, expected_dict): - ''' + """ Test of the paramfunctions.calc_s function - ''' + """ test_dict, test_E_pt = pf.calc_s(p) for k, v in test_dict[entity_type].items(): - assert (np.allclose(v, expected_dict[entity_type][k])) + assert np.allclose(v, expected_dict[entity_type][k]) - assert (np.allclose(test_E_pt, 0.048955981)) + assert np.allclose(test_E_pt, 0.048955981) p = Specification() revisions = { - 'f_c': 0.4, 'f_pt': 0.2, 'interest_deduct_haircut_c': 0.0, - 'interest_deduct_haircut_pt': 0.0, 'ace_c': 0.0, 'ace_pt': 0.0, - 'CIT_rate': 0.25, 'tau_pt': 0.22, 'nominal_interest_rate': 0.05, - 'inflation_rate': 0.02, 'E_c': 0.08} + "f_c": 0.4, + "f_pt": 0.2, + "interest_deduct_haircut_c": 0.0, + "interest_deduct_haircut_pt": 0.0, + "ace_c": 0.0, + "ace_pt": 0.0, + "CIT_rate": 0.25, + "tau_pt": 0.22, + "nominal_interest_rate": 0.05, + "inflation_rate": 0.02, + "E_c": 0.08, +} p.update_specification(revisions) expected_r_dict = { - 'c': {'mix': np.array([0.075]), 'd': np.array([0.0375]), - 'e': np.array([0.1])}, - 'pt': {'mix': np.array([0.0798]), 'd': np.array([0.039]), - 'e': np.array([0.09])}} - - -@pytest.mark.parametrize('p,expected_dict', - [(p, expected_r_dict)], - ids=['Test 1']) + "c": { + "mix": np.array([0.075]), + "d": np.array([0.0375]), + "e": np.array([0.1]), + }, + "pt": { + "mix": np.array([0.0798]), + "d": np.array([0.039]), + "e": np.array([0.09]), + }, +} + + +@pytest.mark.parametrize( + "p,expected_dict", [(p, expected_r_dict)], ids=["Test 1"] +) def test_calc_r(p, expected_dict): - ''' + """ Test of the calculation of the discount rate function - ''' - f_dict = {'c': {'mix': p.f_c, 'd': 1.0, 'e': 0.0}, - 'pt': {'mix': p.f_pt, 'd': 1.0, 'e': 0.0}} - int_haircut_dict = {'c': p.interest_deduct_haircut_c, - 'pt': p.interest_deduct_haircut_pt} - E_dict = {'c': p.E_c, 'pt': 0.07} - print('E dict = ', E_dict) - ace_dict = {'c': p.ace_c, 'pt': p.ace_pt} + """ + f_dict = { + "c": {"mix": p.f_c, "d": 1.0, "e": 0.0}, + "pt": {"mix": p.f_pt, "d": 1.0, "e": 0.0}, + } + int_haircut_dict = { + "c": p.interest_deduct_haircut_c, + "pt": p.interest_deduct_haircut_pt, + } + E_dict = {"c": p.E_c, "pt": 0.07} + print("E dict = ", E_dict) + ace_dict = {"c": p.ace_c, "pt": p.ace_pt} test_dict = {} for t in p.entity_list: test_dict[t] = {} @@ -160,31 +208,40 @@ def test_calc_r(p, expected_dict): f_dict[t][f], int_haircut_dict[t], E_dict[t], - ace_dict[t] - ) + ace_dict[t], + ) for k, v in test_dict.items(): for k2, v2 in v.items(): - assert (np.allclose(v2, expected_dict[k][k2])) + assert np.allclose(v2, expected_dict[k][k2]) expected_rprime_dict = { - 'c': {'mix': np.array([0.08]), 'd': np.array([0.05]), - 'e': np.array([0.1])}, - 'pt': {'mix': np.array([0.082]), 'd': np.array([0.05]), - 'e': np.array([0.09])}} - - -@pytest.mark.parametrize('p,expected_dict', - [(p, expected_rprime_dict)], - ids=['Test 1']) + "c": { + "mix": np.array([0.08]), + "d": np.array([0.05]), + "e": np.array([0.1]), + }, + "pt": { + "mix": np.array([0.082]), + "d": np.array([0.05]), + "e": np.array([0.09]), + }, +} + + +@pytest.mark.parametrize( + "p,expected_dict", [(p, expected_rprime_dict)], ids=["Test 1"] +) def test_calc_rprime(p, expected_dict): - ''' + """ Test of the calculation of the after-tax rate of return function - ''' - f_dict = {'c': {'mix': p.f_c, 'd': 1.0, 'e': 0.0}, - 'pt': {'mix': p.f_pt, 'd': 1.0, 'e': 0.0}} - E_dict = {'c': p.E_c, 'pt': 0.07} + """ + f_dict = { + "c": {"mix": p.f_c, "d": 1.0, "e": 0.0}, + "pt": {"mix": p.f_pt, "d": 1.0, "e": 0.0}, + } + E_dict = {"c": p.E_c, "pt": 0.07} test_dict = {} for t in p.entity_list: test_dict[t] = {} @@ -193,9 +250,9 @@ def test_calc_rprime(p, expected_dict): p.nominal_interest_rate, p.inflation_rate, f_dict[t][f], - E_dict[t] + E_dict[t], ) for k, v in test_dict.items(): for k2, v2 in v.items(): - assert (np.allclose(v2, expected_dict[k][k2])) + assert np.allclose(v2, expected_dict[k][k2]) diff --git a/ccc/tests/test_run_ccc.py b/ccc/tests/test_run_ccc.py index 646b2139..14940cdc 100644 --- a/ccc/tests/test_run_ccc.py +++ b/ccc/tests/test_run_ccc.py @@ -27,14 +27,16 @@ def test_calc_by_methods(): actual_by_industry = calc.calc_by_industry() # load expected results from the calc_by_ methods expect_by_asset = pd.read_json( - os.path.join(TDIR, 'run_ccc_asset_output.json') + os.path.join(TDIR, "run_ccc_asset_output.json") ) expect_by_industry = pd.read_json( - os.path.join(TDIR, 'run_ccc_industry_output.json') + os.path.join(TDIR, "run_ccc_industry_output.json") ) # compare the actual and expect DataFrames - for actual_df, expect_df in zip([actual_by_asset, actual_by_industry], - [expect_by_asset, expect_by_industry]): + for actual_df, expect_df in zip( + [actual_by_asset, actual_by_industry], + [expect_by_asset, expect_by_industry], + ): actual_df.sort_index(inplace=True) actual_df.reset_index(inplace=True) expect_df.sort_index(inplace=True) @@ -45,8 +47,9 @@ def test_calc_by_methods(): example = getattr(actual_df, col).iloc[0] can_diff = isinstance(example, numbers.Number) if can_diff: - assert np.allclose(actual_df[col].values, - expect_df[col].values, atol=1e-5) + assert np.allclose( + actual_df[col].values, expect_df[col].values, atol=1e-5 + ) else: pass except AttributeError: @@ -66,9 +69,14 @@ def test_example_output(): calc1 = Calculator(baseline_parameters, dp, assets) reform_parameters = Specification(year=cyr) business_tax_adjustments = { - 'CIT_rate': 0.35, 'BonusDeprec_3yr': 0.50, 'BonusDeprec_5yr': 0.50, - 'BonusDeprec_7yr': 0.50, 'BonusDeprec_10yr': 0.50, - 'BonusDeprec_15yr': 0.50, 'BonusDeprec_20yr': 0.50} + "CIT_rate": 0.35, + "BonusDeprec_3yr": 0.50, + "BonusDeprec_5yr": 0.50, + "BonusDeprec_7yr": 0.50, + "BonusDeprec_10yr": 0.50, + "BonusDeprec_15yr": 0.50, + "BonusDeprec_20yr": 0.50, + } reform_parameters.update_specification(business_tax_adjustments) calc2 = Calculator(reform_parameters, dp, assets) # ... calculation by asset and by industry @@ -76,38 +84,51 @@ def test_example_output(): reform_assets_df = calc2.calc_by_asset() baseline_industry_df = calc1.calc_by_industry() reform_industry_df = calc2.calc_by_industry() - diff_assets_df = ccc.utils.diff_two_tables(reform_assets_df, - baseline_assets_df) - diff_industry_df = ccc.utils.diff_two_tables(reform_industry_df, - baseline_industry_df) + diff_assets_df = ccc.utils.diff_two_tables( + reform_assets_df, baseline_assets_df + ) + diff_industry_df = ccc.utils.diff_two_tables( + reform_industry_df, baseline_industry_df + ) # ... save calculated results as csv files in ccc/test directory - baseline_industry_df.to_csv(os.path.join(TDIR, 'baseline_byindustry.csv'), - float_format='%.5f') - reform_industry_df.to_csv(os.path.join(TDIR, 'reform_byindustry.csv'), - float_format='%.5f') - baseline_assets_df.to_csv(os.path.join(TDIR, 'baseline_byasset.csv'), - float_format='%.5f') - reform_assets_df.to_csv(os.path.join(TDIR, 'reform_byasset.csv'), - float_format='%.5f') - diff_industry_df.to_csv(os.path.join(TDIR, 'changed_byindustry.csv'), - float_format='%.5f') - diff_assets_df.to_csv(os.path.join(TDIR, 'changed_byasset.csv'), - float_format='%.5f') + baseline_industry_df.to_csv( + os.path.join(TDIR, "baseline_byindustry.csv"), float_format="%.5f" + ) + reform_industry_df.to_csv( + os.path.join(TDIR, "reform_byindustry.csv"), float_format="%.5f" + ) + baseline_assets_df.to_csv( + os.path.join(TDIR, "baseline_byasset.csv"), float_format="%.5f" + ) + reform_assets_df.to_csv( + os.path.join(TDIR, "reform_byasset.csv"), float_format="%.5f" + ) + diff_industry_df.to_csv( + os.path.join(TDIR, "changed_byindustry.csv"), float_format="%.5f" + ) + diff_assets_df.to_csv( + os.path.join(TDIR, "changed_byasset.csv"), float_format="%.5f" + ) # compare actual calculated results to expected results - failmsg = '' - expect_output_dir = os.path.join(TDIR, '..', '..', 'example_output') - for fname in ['baseline_byasset', 'baseline_byindustry', - 'reform_byasset', 'reform_byindustry', - 'changed_byasset', 'changed_byindustry']: - actual_path = os.path.join(TDIR, fname + '.csv') + failmsg = "" + expect_output_dir = os.path.join(TDIR, "..", "..", "example_output") + for fname in [ + "baseline_byasset", + "baseline_byindustry", + "reform_byasset", + "reform_byindustry", + "changed_byasset", + "changed_byindustry", + ]: + actual_path = os.path.join(TDIR, fname + ".csv") actual_df = pd.read_csv(actual_path) - expect_path = os.path.join(expect_output_dir, fname + '_expected.csv') + expect_path = os.path.join(expect_output_dir, fname + "_expected.csv") expect_df = pd.read_csv(expect_path) try: assert_frame_equal(actual_df, expect_df) # cleanup actual results if it has same contents as expected file os.remove(actual_path) except AssertionError: - failmsg += 'ACTUAL-vs-EXPECT DIFFERENCES FOR {}\n'.format(fname) + failmsg += "ACTUAL-vs-EXPECT DIFFERENCES FOR {}\n".format(fname) if failmsg: - raise AssertionError('\n' + failmsg) + raise AssertionError("\n" + failmsg) diff --git a/ccc/tests/test_start_years.py b/ccc/tests/test_start_years.py index 31a6a063..a1eab1af 100644 --- a/ccc/tests/test_start_years.py +++ b/ccc/tests/test_start_years.py @@ -5,29 +5,33 @@ @pytest.mark.parametrize( - 'year', - [2014, 2015, 2017, 2027], - ids=['2014', '2015', '2017', '2027']) + "year", [2014, 2015, 2017, 2027], ids=["2014", "2015", "2017", "2027"] +) def test_tc_start_year(year): - ''' + """ Test that different start years work in functions calling Tax-Calculator - ''' + """ get_rates(True, year) @pytest.mark.parametrize( - 'year,expected_values', - [(2014, [0.35, 0.5, 0.5]), (2015, [0.35, 0.5, 0.5]), - (2017, [0.35, 0.5, 0.5]), (2026, [0.21, 0.2, 0.5]), - (2027, [0.21, 0.0, 0.5])], - ids=['2014', '2015', '2017', '2026', '2027']) + "year,expected_values", + [ + (2014, [0.35, 0.5, 0.5]), + (2015, [0.35, 0.5, 0.5]), + (2017, [0.35, 0.5, 0.5]), + (2026, [0.21, 0.2, 0.5]), + (2027, [0.21, 0.0, 0.5]), + ], + ids=["2014", "2015", "2017", "2026", "2027"], +) def test_params_start_year(year, expected_values): - ''' + """ Test that different start years return the expected parameter values as specificed in the default_parameters.json file. - ''' + """ p = Specification(year=year) - assert (np.allclose(p.u['c'], expected_values[0])) - assert (np.allclose(p.bonus_deprec['3'], expected_values[1])) - assert (np.allclose(p.phi, expected_values[2])) + assert np.allclose(p.u["c"], expected_values[0]) + assert np.allclose(p.bonus_deprec["3"], expected_values[1]) + assert np.allclose(p.phi, expected_values[2]) diff --git a/ccc/tests/test_utils.py b/ccc/tests/test_utils.py index 25382ce1..d4823224 100644 --- a/ccc/tests/test_utils.py +++ b/ccc/tests/test_utils.py @@ -6,104 +6,107 @@ def test_to_str(): - ''' + """ Test of the to_str() function - ''' - number = '3' + """ + number = "3" test_str = utils.to_str(number) assert isinstance(test_str, str) def test_to_str_decode(): - ''' + """ Test of the to_str() function - ''' - number = '3'.encode() + """ + number = "3".encode() test_str = utils.to_str(number) assert isinstance(test_str, str) -test_data = [(27.5, '27_5'), (30, '30')] +test_data = [(27.5, "27_5"), (30, "30")] -@pytest.mark.parametrize('number,expected', test_data, - ids=['Decimal', 'Integer']) +@pytest.mark.parametrize( + "number,expected", test_data, ids=["Decimal", "Integer"] +) def test_str_modified(number, expected): - ''' + """ Test of the str_modified() function - ''' - number = '3' + """ + number = "3" test_str = utils.str_modified(number) - assert (number == test_str) + assert number == test_str def test_diff_two_tables(): - ''' + """ Test of the diff_two_tables() function - ''' - dict1 = {'var1': [1, 2, 3, 4, 5], 'var2': [2, 4, 6, 8, 10]} - dict2 = {'var1': [1, 2, 3, 4, 5], 'var2': [2, 4, 6, 8, 10]} + """ + dict1 = {"var1": [1, 2, 3, 4, 5], "var2": [2, 4, 6, 8, 10]} + dict2 = {"var1": [1, 2, 3, 4, 5], "var2": [2, 4, 6, 8, 10]} df1 = pd.DataFrame.from_dict(dict1) df2 = pd.DataFrame.from_dict(dict2) - expected_dict = {'var1': [0, 0, 0, 0, 0], 'var2': [0, 0, 0, 0, 0]} + expected_dict = {"var1": [0, 0, 0, 0, 0], "var2": [0, 0, 0, 0, 0]} expected_df = pd.DataFrame.from_dict(expected_dict) test_df = utils.diff_two_tables(df1, df2) pd.testing.assert_frame_equal(test_df, expected_df) def test_wavg(): - ''' + """ Test of utils.wavg() function - ''' - dict1 = {'id': ['a', 'a', 'a'], - 'var1': [1, 2, 3], - 'var2': [2, 4, 6], - 'wgt_var': [0.25, 0.5, 0.25]} + """ + dict1 = { + "id": ["a", "a", "a"], + "var1": [1, 2, 3], + "var2": [2, 4, 6], + "wgt_var": [0.25, 0.5, 0.25], + } df1 = pd.DataFrame.from_dict(dict1) expected_val = 2.0 - test_val = utils.wavg(df1, 'var1', 'wgt_var') + test_val = utils.wavg(df1, "var1", "wgt_var") assert np.allclose(test_val, expected_val) def test_read_egg_csv(): - ''' + """ Test of utils.read_egg_csv() function - ''' - test_df = utils.read_egg_csv('ccc_asset_data.csv') + """ + test_df = utils.read_egg_csv("ccc_asset_data.csv") assert isinstance(test_df, pd.DataFrame) def test_read_egg_csv_exception(): - ''' + """ Test of utils.read_egg_csv() function - ''' + """ with pytest.raises(Exception): - assert utils.read_egg_csv('ccc_asset_data2.csv') + assert utils.read_egg_csv("ccc_asset_data2.csv") def test_read_egg_json(): - ''' + """ Test of utils.read_egg_csv() function - ''' - test_dict = utils.read_egg_json('records_variables.json') + """ + test_dict = utils.read_egg_json("records_variables.json") assert isinstance(test_dict, dict) def test_read_egg_json_exception(): - ''' + """ Test of utils.read_egg_csv() function - ''' + """ with pytest.raises(Exception): - assert utils.read_egg_json('records_variables2.json') + assert utils.read_egg_json("records_variables2.json") def test_json_to_dict(): - ''' + """ Test of utils.json_to_dict() function - ''' + """ json_string = """{ "read": { "asset_name": { @@ -118,13 +121,13 @@ def test_json_to_dict(): }""" test_dict = utils.json_to_dict(json_string) - assert test_dict['read']['asset_name']['type'] == 'string' + assert test_dict["read"]["asset_name"]["type"] == "string" def test_json_to_dict_exception(): - ''' + """ Test of utils.json_to_dict() function - ''' + """ json_string = """{ "CIT_rate" } @@ -133,13 +136,14 @@ def test_json_to_dict_exception(): assert utils.json_to_dict(json_string) -dict1 = {'var1': [1, 2, 3, 4, 5], 'var2': [2, 4, 6, 8, 10]} +dict1 = {"var1": [1, 2, 3, 4, 5], "var2": [2, 4, 6, 8, 10]} df1 = pd.DataFrame.from_dict(dict1) -test_data = [(df1, 'tex', 0), (df1, 'json', 2), (df1, 'html', 3)] +test_data = [(df1, "tex", 0), (df1, "json", 2), (df1, "html", 3)] -@pytest.mark.parametrize('df,output_type,precision', test_data, - ids=['tex', 'json', 'html']) +@pytest.mark.parametrize( + "df,output_type,precision", test_data, ids=["tex", "json", "html"] +) def test_save_return_table(df, output_type, precision): test_str = utils.save_return_table(df, output_type, None, precision) @@ -147,49 +151,56 @@ def test_save_return_table(df, output_type, precision): def test_save_return_table_df(): - ''' + """ Test that can return dataframe from utils.test_save_return_table - ''' - dict1 = {'var1': [1, 2, 3, 4, 5], 'var2': [2, 4, 6, 8, 10]} + """ + dict1 = {"var1": [1, 2, 3, 4, 5], "var2": [2, 4, 6, 8, 10]} df1 = pd.DataFrame.from_dict(dict1) test_df = utils.save_return_table(df1) assert isinstance(test_df, pd.DataFrame) -path1 = 'output.tex' -path2 = 'output.csv' -path3 = 'output.json' -path4 = 'output.xlsx' +path1 = "output.tex" +path2 = "output.csv" +path3 = "output.json" +path4 = "output.xlsx" # # writetoafile(file.strpath) # or use str(file) # assert file.read() == 'Hello\n' -test_data = [(df1, 'tex', path1), (df1, 'csv', path2), - (df1, 'json', path3), (df1, 'excel', path4)] +test_data = [ + (df1, "tex", path1), + (df1, "csv", path2), + (df1, "json", path3), + (df1, "excel", path4), +] -@pytest.mark.parametrize('df,output_type,path', test_data, - ids=['tex', 'csv', 'json', 'excel']) +@pytest.mark.parametrize( + "df,output_type,path", test_data, ids=["tex", "csv", "json", "excel"] +) def test_save_return_table_write(df, output_type, path): - ''' + """ Test of the utils.save_return_table function for case wehn write to disk - ''' + """ utils.save_return_table(df, output_type, path=path) filehandle = open(path) try: assert filehandle.read() is not None except UnicodeDecodeError: from openpyxl import load_workbook + wb = load_workbook(filename=path) assert wb is not None filehandle.close() def test_save_return_table_exception(): - ''' + """ Test that can return dataframe from utils.test_save_return_table - ''' - dict1 = {'var1': [1, 2, 3, 4, 5], 'var2': [2, 4, 6, 8, 10]} + """ + dict1 = {"var1": [1, 2, 3, 4, 5], "var2": [2, 4, 6, 8, 10]} df1 = pd.DataFrame.from_dict(dict1) with pytest.raises(Exception): - assert utils.save_return_table(df1, output_type='xls', - path='filename.tex') + assert utils.save_return_table( + df1, output_type="xls", path="filename.tex" + ) diff --git a/ccc/utils.py b/ccc/utils.py index 6c1fb60b..3e5dc95f 100644 --- a/ccc/utils.py +++ b/ccc/utils.py @@ -6,8 +6,8 @@ import json import pandas as pd -PACKAGE_NAME = 'ccc' -PYPI_PACKAGE_NAME = 'cost-of-capital-calculator' +PACKAGE_NAME = "ccc" +PYPI_PACKAGE_NAME = "cost-of-capital-calculator" # Default year for model runs DEFAULT_START_YEAR = 2022 @@ -23,7 +23,7 @@ def to_str(x): - ''' + """ Function to decode string. Args: @@ -32,14 +32,14 @@ def to_str(x): Returns: x (string): decoded string - ''' - if hasattr(x, 'decode'): + """ + if hasattr(x, "decode"): return x.decode() return x def str_modified(i): - ''' + """ Function to deal with conversion of a decimal number to a string. Args: @@ -48,16 +48,16 @@ def str_modified(i): Returns: str_i (string): number converted to a string - ''' + """ if i == 27.5: - str_i = '27_5' + str_i = "27_5" else: str_i = str(int(i)) return str_i def diff_two_tables(df1, df2): - ''' + """ Create the difference between two dataframes. Args: @@ -68,7 +68,7 @@ def diff_two_tables(df1, df2): diff_df (Pandas DataFrame): DataFrame with differences between two DataFrames - ''' + """ assert tuple(df1.columns) == tuple(df2.columns) diffs = OrderedDict() for c in df1.columns: @@ -86,7 +86,7 @@ def diff_two_tables(df1, df2): def wavg(group, avg_name, weight_name): - ''' + """ Computes a weighted average. Args: @@ -97,8 +97,8 @@ def wavg(group, avg_name, weight_name): Returns: d (scalar): weighted avg for the group - ''' - warnings.filterwarnings('error') + """ + warnings.filterwarnings("error") d = group[avg_name] w = group[weight_name] try: @@ -108,7 +108,7 @@ def wavg(group, avg_name, weight_name): def read_egg_csv(fname, index_col=None): - ''' + """ Read from egg the file named fname that contains CSV data and return pandas DataFrame containing the data. @@ -118,24 +118,24 @@ def read_egg_csv(fname, index_col=None): Returns: vdf (Pandas DataFrame): data from csv file - ''' + """ # try: path_in_egg = os.path.join(PACKAGE_NAME, fname) try: vdf = pd.read_csv( pkg_resources.resource_stream( - pkg_resources.Requirement.parse(PYPI_PACKAGE_NAME), - path_in_egg), - index_col=index_col + pkg_resources.Requirement.parse(PYPI_PACKAGE_NAME), path_in_egg + ), + index_col=index_col, ) except Exception: - raise ValueError('could not read {} data from egg'.format(fname)) + raise ValueError("could not read {} data from egg".format(fname)) # cannot call read_egg_ function in unit tests return vdf # pragma: no cover def read_egg_json(fname): - ''' + """ Read from egg the file named fname that contains JSON data and return dictionary containing the data. @@ -145,23 +145,25 @@ def read_egg_json(fname): Returns: pdict (dict): data from JSON file - ''' + """ try: path_in_egg = os.path.join(PACKAGE_NAME, fname) pdict = json.loads( pkg_resources.resource_stream( - pkg_resources.Requirement.parse(PYPI_PACKAGE_NAME), - path_in_egg).read().decode('utf-8'), - object_pairs_hook=OrderedDict + pkg_resources.Requirement.parse(PYPI_PACKAGE_NAME), path_in_egg + ) + .read() + .decode("utf-8"), + object_pairs_hook=OrderedDict, ) except Exception: - raise ValueError('could not read {} data from egg'.format(fname)) + raise ValueError("could not read {} data from egg".format(fname)) # cannot call read_egg_ function in unit tests return pdict # pragma: no cover def json_to_dict(json_text): - ''' + """ Convert specified JSON text into an ordered Python dictionary. Args: @@ -174,31 +176,31 @@ def json_to_dict(json_text): ordered_dict (collections.OrderedDict): JSON data expressed as an ordered Python dictionary. - ''' + """ try: - ordered_dict = json.loads(json_text, - object_pairs_hook=OrderedDict) + ordered_dict = json.loads(json_text, object_pairs_hook=OrderedDict) except ValueError as valerr: - text_lines = json_text.split('\n') - msg = 'Text below contains invalid JSON:\n' - msg += str(valerr) + '\n' - msg += 'Above location of the first error may be approximate.\n' - msg += 'The invalid JSON text is between the lines:\n' - bline = ('XXXX----.----1----.----2----.----3----.----4' - '----.----5----.----6----.----7') - msg += bline + '\n' + text_lines = json_text.split("\n") + msg = "Text below contains invalid JSON:\n" + msg += str(valerr) + "\n" + msg += "Above location of the first error may be approximate.\n" + msg += "The invalid JSON text is between the lines:\n" + bline = ( + "XXXX----.----1----.----2----.----3----.----4" + "----.----5----.----6----.----7" + ) + msg += bline + "\n" linenum = 0 for line in text_lines: linenum += 1 - msg += '{:04d}{}'.format(linenum, line) + '\n' - msg += bline + '\n' + msg += "{:04d}{}".format(linenum, line) + "\n" + msg += bline + "\n" raise ValueError(msg) return ordered_dict -def save_return_table(table_df, output_type=None, path=None, - precision=0): - ''' +def save_return_table(table_df, output_type=None, path=None, precision=0): + """ Function to save or return a table of data. Args: @@ -212,46 +214,58 @@ def save_return_table(table_df, output_type=None, path=None, Returns: table_df (Pandas DataFrame): table - ''' + """ if path is None: - if output_type == 'tex': + if output_type == "tex": tab_str = table_df.to_latex( - buf=path, index=False, na_rep='', - float_format=lambda x: '%.' + str(precision) + '0f' % x) + buf=path, + index=False, + na_rep="", + float_format=lambda x: "%." + str(precision) + "0f" % x, + ) return tab_str - elif output_type == 'json': - tab_str = table_df.to_json( - path_or_buf=path, double_precision=0) + elif output_type == "json": + tab_str = table_df.to_json(path_or_buf=path, double_precision=0) return tab_str - elif output_type == 'html': - with pd.option_context('display.precision', precision): + elif output_type == "html": + with pd.option_context("display.precision", precision): tab_html = table_df.to_html( - index=False, - float_format=lambda x: '%10.0f' % x, - classes="table table-striped table-hover") + index=False, + float_format=lambda x: "%10.0f" % x, + classes="table table-striped table-hover", + ) return tab_html else: return table_df else: condition = ( - (path.split('.')[-1] == output_type) or - (path.split('.')[-1] == 'xlsx' and output_type == 'excel') or - (path.split('.')[-1] == 'xls' and output_type == 'excel')) + (path.split(".")[-1] == output_type) + or (path.split(".")[-1] == "xlsx" and output_type == "excel") + or (path.split(".")[-1] == "xls" and output_type == "excel") + ) if condition: - if output_type == 'tex': - table_df.to_latex(buf=path, index=False, na_rep='', - float_format=lambda x: '%.' + - str(precision) + '0f' % x) - elif output_type == 'csv': - table_df.to_csv(path_or_buf=path, index=False, - na_rep='', float_format='%.' + - str(precision) + '0f') - elif output_type == 'json': - table_df.to_json(path_or_buf=path, - double_precision=precision) - elif output_type == 'excel': - table_df.to_excel(excel_writer=path, index=False, - na_rep='', float_format='%.' + - str(precision) + '0f') + if output_type == "tex": + table_df.to_latex( + buf=path, + index=False, + na_rep="", + float_format=lambda x: "%." + str(precision) + "0f" % x, + ) + elif output_type == "csv": + table_df.to_csv( + path_or_buf=path, + index=False, + na_rep="", + float_format="%." + str(precision) + "0f", + ) + elif output_type == "json": + table_df.to_json(path_or_buf=path, double_precision=precision) + elif output_type == "excel": + table_df.to_excel( + excel_writer=path, + index=False, + na_rep="", + float_format="%." + str(precision) + "0f", + ) else: - raise ValueError('Please enter a valid output format') + raise ValueError("Please enter a valid output format") diff --git a/cs-config/cs_config/functions.py b/cs-config/cs_config/functions.py index 0df12c5d..b396fe2c 100644 --- a/cs-config/cs_config/functions.py +++ b/cs-config/cs_config/functions.py @@ -16,10 +16,11 @@ class MetaParams(paramtools.Parameters): - ''' + """ Meta parameters class for COMP. These parameters will be in a drop down menu on COMP. - ''' + """ + array_first = True defaults = { "year": { @@ -28,21 +29,21 @@ class MetaParams(paramtools.Parameters): "type": "int", "value": DEFAULT_START_YEAR, "validators": { - "when": { - "param": "data_source", - "is": "CPS", - "then": {"range": {"min": 2014, "max": TC_LAST_YEAR}}, - "otherwise": {"range": {"min": 2013, "max": TC_LAST_YEAR}} - } - }, + "when": { + "param": "data_source", + "is": "CPS", + "then": {"range": {"min": 2014, "max": TC_LAST_YEAR}}, + "otherwise": {"range": {"min": 2013, "max": TC_LAST_YEAR}}, + } + }, }, "data_source": { "title": "Data source", "description": "Data source for Tax-Calculator to use", "type": "str", "value": "CPS", - "validators": {"choice": {"choices": ["PUF", "CPS"]}} - } + "validators": {"choice": {"choices": ["PUF", "CPS"]}}, + }, } def dump(self, *args, **kwargs): @@ -54,23 +55,19 @@ def dump(self, *args, **kwargs): data = super().dump(*args, **kwargs) if self.data_source == "CPS": data["year"]["validators"] = { - "choice": { - "choices": list(range(2014, TC_LAST_YEAR)) - } + "choice": {"choices": list(range(2014, TC_LAST_YEAR))} } else: data["year"]["validators"] = { - "choice": { - "choices": list(range(2013, TC_LAST_YEAR)) - } + "choice": {"choices": list(range(2013, TC_LAST_YEAR))} } return data def get_inputs(meta_params_dict): - ''' + """ Function to get user input parameters from COMP - ''' + """ # Get meta-params from web app meta_params = MetaParams() with meta_params.transaction(defer_validation=True): @@ -85,12 +82,33 @@ def get_inputs(meta_params_dict): # filter out parameters that can be changed with Tax-Calc params or # that users unlikely to use (so reduce clutter on screen) filter_list = [ - 'tau_div', 'tau_nc', 'tau_int', 'tau_scg', 'tau_lcg', 'tau_td', - 'tau_h', 'alpha_c_e_ft', 'alpha_c_e_td', 'alpha_c_e_nt', - 'alpha_c_d_ft', 'alpha_c_d_td', 'alpha_c_d_nt', 'alpha_nc_d_ft', - 'alpha_nc_d_td', 'alpha_nc_d_nt', 'alpha_h_d_ft', 'alpha_h_d_td', - 'alpha_h_d_nt', 'Y_td', 'Y_scg', 'Y_lcg', 'Y_xcg', 'Y_v', 'gamma', - 'phi'] + "tau_div", + "tau_nc", + "tau_int", + "tau_scg", + "tau_lcg", + "tau_td", + "tau_h", + "alpha_c_e_ft", + "alpha_c_e_td", + "alpha_c_e_nt", + "alpha_c_d_ft", + "alpha_c_d_td", + "alpha_c_d_nt", + "alpha_nc_d_ft", + "alpha_nc_d_td", + "alpha_nc_d_nt", + "alpha_h_d_ft", + "alpha_h_d_td", + "alpha_h_d_nt", + "Y_td", + "Y_scg", + "Y_lcg", + "Y_xcg", + "Y_v", + "gamma", + "phi", + ] for k, v in ccc_params.dump().items(): if k not in filter_list: filtered_ccc_params[k] = v @@ -100,42 +118,44 @@ def get_inputs(meta_params_dict): iit_params.set_state(year=meta_params.year.tolist()) filtered_iit_params = cs2tc.convert_policy_defaults( - meta_params, iit_params) + meta_params, iit_params + ) default_params = { "Business Tax Parameters": filtered_ccc_params, - "Individual and Payroll Tax Parameters": filtered_iit_params + "Individual and Payroll Tax Parameters": filtered_iit_params, } return { - "meta_parameters": meta_params.dump(), - "model_parameters": default_params - } + "meta_parameters": meta_params.dump(), + "model_parameters": default_params, + } def validate_inputs(meta_param_dict, adjustment, errors_warnings): - ''' + """ Validates user inputs for parameters - ''' + """ # Validate meta parameter inputs meta_params = MetaParams() meta_params.adjust(meta_param_dict, raise_errors=False) errors_warnings["Business Tax Parameters"]["errors"].update( - meta_params.errors) + meta_params.errors + ) # Validate CCC parameter inputs params = Specification() - params.adjust(adjustment["Business Tax Parameters"], - raise_errors=False) - errors_warnings["Business Tax Parameters"]["errors"].update( - params.errors) + params.adjust(adjustment["Business Tax Parameters"], raise_errors=False) + errors_warnings["Business Tax Parameters"]["errors"].update(params.errors) # Validate TC parameter inputs iit_adj = cs2tc.convert_policy_adjustment( - adjustment["Individual and Payroll Tax Parameters"]) + adjustment["Individual and Payroll Tax Parameters"] + ) iit_params = Policy() iit_params.adjust(iit_adj, raise_errors=False, ignore_warnings=True) - errors_warnings["Individual and Payroll Tax Parameters"][ - "errors"].update(iit_params.errors) + errors_warnings["Individual and Payroll Tax Parameters"]["errors"].update( + iit_params.errors + ) return {"errors_warnings": errors_warnings} @@ -145,10 +165,10 @@ def get_version(): def run_model(meta_param_dict, adjustment): - ''' + """ Initializes classes from CCC that compute the model under different policies. Then calls function get output objects. - ''' + """ # update MetaParams meta_params = MetaParams() meta_params.adjust(meta_param_dict) @@ -158,33 +178,53 @@ def run_model(meta_param_dict, adjustment): else: data = "cps" # Get TC params adjustments - iit_mods = cs2tc.convert_policy_adjustment(adjustment[ - "Individual and Payroll Tax Parameters"]) + iit_mods = cs2tc.convert_policy_adjustment( + adjustment["Individual and Payroll Tax Parameters"] + ) filtered_ccc_params = {} # filter out CCC params that will not change between baeline and # reform runs (These are the Household Savings Behavior and # Economic Assumptions) constant_param_list = [ - 'omega_scg', 'omega_lcg', 'omega_xcg', 'alpha_c_e_ft', - 'alpha_c_e_td', 'alpha_c_e_nt', 'alpha_c_d_ft', 'alpha_c_d_td', - 'alpha_c_d_nt', 'alpha_nc_d_ft', 'alpha_nc_d_td', - 'alpha_nc_d_nt', 'alpha_h_d_ft', 'alpha_h_d_td', 'alpha_h_d_nt', - 'Y_td', 'Y_scg', 'Y_lcg', 'gamma', 'E_c', 'inflation_rate', - 'nominal_interest_rate'] + "omega_scg", + "omega_lcg", + "omega_xcg", + "alpha_c_e_ft", + "alpha_c_e_td", + "alpha_c_e_nt", + "alpha_c_d_ft", + "alpha_c_d_td", + "alpha_c_d_nt", + "alpha_nc_d_ft", + "alpha_nc_d_td", + "alpha_nc_d_nt", + "alpha_h_d_ft", + "alpha_h_d_td", + "alpha_h_d_nt", + "Y_td", + "Y_scg", + "Y_lcg", + "gamma", + "E_c", + "inflation_rate", + "nominal_interest_rate", + ] filtered_ccc_params = OrderedDict() - for k, v in adjustment['Business Tax Parameters'].items(): + for k, v in adjustment["Business Tax Parameters"].items(): if k in constant_param_list: filtered_ccc_params[k] = v # Baseline CCC calculator - params = Specification(year=meta_params.year, call_tc=False, - iit_reform={}, data=data) + params = Specification( + year=meta_params.year, call_tc=False, iit_reform={}, data=data + ) params.update_specification(filtered_ccc_params) assets = Assets() dp = DepreciationParams() calc1 = Calculator(params, dp, assets) # Reform CCC calculator - includes TC adjustments - params2 = Specification(year=meta_params.year, call_tc=True, - iit_reform=iit_mods, data=data) + params2 = Specification( + year=meta_params.year, call_tc=True, iit_reform=iit_mods, data=data + ) params2.update_specification(adjustment["Business Tax Parameters"]) calc2 = Calculator(params2, dp, assets) comp_dict = comp_output(calc1, calc2) @@ -192,71 +232,75 @@ def run_model(meta_param_dict, adjustment): return comp_dict -def comp_output(calc1, calc2, out_var='mettr'): - ''' +def comp_output(calc1, calc2, out_var="mettr"): + """ Function to create output for the COMP platform - ''' + """ baseln_assets_df = calc1.calc_by_asset() reform_assets_df = calc2.calc_by_asset() baseln_industry_df = calc1.calc_by_industry() reform_industry_df = calc2.calc_by_industry() - html_table = calc1.summary_table(calc2, output_variable=out_var, - output_type='html') - plt1 = calc1.grouped_bar(calc2, output_variable=out_var, - include_title=True) + html_table = calc1.summary_table( + calc2, output_variable=out_var, output_type="html" + ) + plt1 = calc1.grouped_bar( + calc2, output_variable=out_var, include_title=True + ) plot_data1 = json_item(plt1) - plt2 = calc1.grouped_bar(calc2, output_variable=out_var, - group_by_asset=False, - include_title=True) + plt2 = calc1.grouped_bar( + calc2, + output_variable=out_var, + group_by_asset=False, + include_title=True, + ) plot_data2 = json_item(plt2) - plt3 = calc1.range_plot(calc2, output_variable='mettr', - include_title=True) + plt3 = calc1.range_plot(calc2, output_variable="mettr", include_title=True) plot_data3 = json_item(plt3) comp_dict = { "renderable": [ { - "media_type": "table", - "title": out_var.upper() + " Summary Table", - "data": html_table + "media_type": "table", + "title": out_var.upper() + " Summary Table", + "data": html_table, }, { - "media_type": "bokeh", - "title": plt1.title._property_values['text'], - "data": plot_data1 + "media_type": "bokeh", + "title": plt1.title._property_values["text"], + "data": plot_data1, }, { - "media_type": "bokeh", - "title": plt2.title._property_values['text'], - "data": plot_data2 + "media_type": "bokeh", + "title": plt2.title._property_values["text"], + "data": plot_data2, }, { - "media_type": "bokeh", - "title": "Marginal Effective Total Tax Rates by Method of Financing", - "data": plot_data3 - } - ], + "media_type": "bokeh", + "title": "Marginal Effective Total Tax Rates by Method of Financing", + "data": plot_data3, + }, + ], "downloadable": [ { - "media_type": "CSV", - "title": "Baseline Results by Asset", - "data": baseln_assets_df.to_csv(float_format='%.5f') + "media_type": "CSV", + "title": "Baseline Results by Asset", + "data": baseln_assets_df.to_csv(float_format="%.5f"), }, { - "media_type": "CSV", - "title": "Reform Results by Asset", - "data": reform_assets_df.to_csv(float_format='%.5f') + "media_type": "CSV", + "title": "Reform Results by Asset", + "data": reform_assets_df.to_csv(float_format="%.5f"), }, { - "media_type": "CSV", - "title": "Baseline Results by Industry", - "data": baseln_industry_df.to_csv(float_format='%.5f') + "media_type": "CSV", + "title": "Baseline Results by Industry", + "data": baseln_industry_df.to_csv(float_format="%.5f"), }, { - "media_type": "CSV", - "title": "Reform Results by Industry", - "data": reform_industry_df.to_csv(float_format='%.5f') - } - ] - } + "media_type": "CSV", + "title": "Reform Results by Industry", + "data": reform_industry_df.to_csv(float_format="%.5f"), + }, + ], + } return comp_dict diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index 58630e53..dac7d0cd 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -1,9 +1,11 @@ """ Functions used to help CCC configure to COMP """ + import os from pathlib import Path import warnings + try: from s3fs import S3FileSystem except ImportError: @@ -22,45 +24,63 @@ "type": "int", "validators": { "choice": { - "choices": [ - yr for yr in range(2013, TC_LAST_YEAR + 1) - ] + "choices": [yr for yr in range(2013, TC_LAST_YEAR + 1)] } - } + }, }, "MARS": { "type": "str", - "validators": {"choice": {"choices": ["single", "mjoint", - "mseparate", "headhh", - "widow"]}} + "validators": { + "choice": { + "choices": [ + "single", + "mjoint", + "mseparate", + "headhh", + "widow", + ] + } + }, }, "idedtype": { "type": "str", - "validators": {"choice": {"choices": ["med", "sltx", "retx", "cas", - "misc", "int", "char"]}} + "validators": { + "choice": { + "choices": [ + "med", + "sltx", + "retx", + "cas", + "misc", + "int", + "char", + ] + } + }, }, "EIC": { "type": "str", - "validators": {"choice": {"choices": ["0kids", "1kid", - "2kids", "3+kids"]}} + "validators": { + "choice": {"choices": ["0kids", "1kid", "2kids", "3+kids"]} + }, }, "data_source": { "type": "str", - "validators": {"choice": {"choices": ["PUF", "CPS", "other"]}} - } + "validators": {"choice": {"choices": ["PUF", "CPS", "other"]}}, + }, }, "additional_members": { "section_1": {"type": "str"}, "section_2": {"type": "str"}, "start_year": {"type": "int"}, - "checkbox": {"type": "bool"} - } + "checkbox": {"type": "bool"}, + }, } def retrieve_puf( aws_access_key_id=AWS_ACCESS_KEY_ID, - aws_secret_access_key=AWS_SECRET_ACCESS_KEY + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, ): """ Function for retrieving the PUF from the OSPC S3 bucket @@ -71,8 +91,10 @@ def retrieve_puf( ) if has_credentials and s3_reader_installed: print("Reading puf from S3 bucket.") - fs = S3FileSystem(key=AWS_ACCESS_KEY_ID, - secret=AWS_SECRET_ACCESS_KEY,) + fs = S3FileSystem( + key=AWS_ACCESS_KEY_ID, + secret=AWS_SECRET_ACCESS_KEY, + ) with fs.open(f"s3://ospc-data-files/{PUF_S3_FILE_NAME}") as f: # Skips over header from top of file. puf_df = pd.read_csv(f, compression="gzip") diff --git a/cs-config/cs_config/tests/test_functions.py b/cs-config/cs_config/tests/test_functions.py index 20c403ca..d04219bf 100644 --- a/cs-config/cs_config/tests/test_functions.py +++ b/cs-config/cs_config/tests/test_functions.py @@ -11,26 +11,33 @@ def test_start_year_with_data_source(): """ data = functions.get_inputs({"data_source": "PUF"}) assert ( - data["meta_parameters"]["year"]["validators"]["choice"] - ["choices"][0] == 2013) + data["meta_parameters"]["year"]["validators"]["choice"]["choices"][0] + == 2013 + ) data = functions.get_inputs({"data_source": "CPS"}) assert ( - data["meta_parameters"]["year"]["validators"]["choice"] - ["choices"][0] == 2014) + data["meta_parameters"]["year"]["validators"]["choice"]["choices"][0] + == 2014 + ) ew = { "Business Tax Parameters": {"errors": {}, "warnings": {}}, - "Individual and Payroll Tax Parameters": - {"errors": {}, "warnings": {}} + "Individual and Payroll Tax Parameters": { + "errors": {}, + "warnings": {}, + }, } res = functions.validate_inputs( {"data_source": "CPS", "year": 2013}, - {"Business Tax Parameters": {}, - "Individual and Payroll Tax Parameters": {}}, ew + { + "Business Tax Parameters": {}, + "Individual and Payroll Tax Parameters": {}, + }, + ew, + ) + assert res["errors_warnings"]["Business Tax Parameters"]["errors"].get( + "year" ) - assert ( - res["errors_warnings"]["Business Tax Parameters"] - ["errors"].get("year")) class TestFunctions1(CoreTestFunctions): @@ -40,30 +47,24 @@ class TestFunctions1(CoreTestFunctions): run_model = functions.run_model ok_adjustment = { "Business Tax Parameters": { - "CIT_rate": [ - { - "year": 2022, - "value": 0.25 - } - ] + "CIT_rate": [{"year": 2022, "value": 0.25}] }, "Individual and Payroll Tax Parameters": { - "FICA_ss_trt": [ - { - "year": 2022, - "value": 0.14 - } - ] - } + "FICA_ss_trt": [{"year": 2022, "value": 0.14}] + }, + } + bad_adjustment = { + "Business Tax Parameters": {"CIT_rate": -0.1}, + "Individual and Payroll Tax Parameters": {"STD": -1}, } - bad_adjustment = {"Business Tax Parameters": {"CIT_rate": -0.1}, - "Individual and Payroll Tax Parameters": {"STD": -1}} def test_param_effect(): - adjustment = {"Business Tax Parameters": {"CIT_rate": 0.35}, - "Individual and Payroll Tax Parameters": {}} + adjustment = { + "Business Tax Parameters": {"CIT_rate": 0.35}, + "Individual and Payroll Tax Parameters": {}, + } comp_dict = functions.run_model({}, adjustment) - df1 = pd.read_csv(io.StringIO(comp_dict['downloadable'][0]['data'])) - df2 = pd.read_csv(io.StringIO(comp_dict['downloadable'][1]['data'])) - assert max(np.absolute(df1['rho_mix']-df2['rho_mix'])) > 0 + df1 = pd.read_csv(io.StringIO(comp_dict["downloadable"][0]["data"])) + df2 = pd.read_csv(io.StringIO(comp_dict["downloadable"][1]["data"])) + assert max(np.absolute(df1["rho_mix"] - df2["rho_mix"])) > 0 diff --git a/data/create_asset_data.py b/data/create_asset_data.py index 07af57ed..d9f2ab35 100644 --- a/data/create_asset_data.py +++ b/data/create_asset_data.py @@ -12,9 +12,9 @@ import read_bea from soi_processing import pull_soi_data import pull_depreciation -from ccc.constants import (MAJOR_ASSET_GROUPS, MINOR_ASSET_GROUPS, - BEA_CODE_DICT) +from ccc.constants import MAJOR_ASSET_GROUPS, MINOR_ASSET_GROUPS, BEA_CODE_DICT from data_paths import get_paths + globals().update(get_paths()) @@ -30,48 +30,63 @@ # and industry # this function also takes care of residential fixed assets # and the owner-occupied housing sector -land, res_assets, owner_occ_dict = read_bea.land( - soi_data, fixed_assets) +land, res_assets, owner_occ_dict = read_bea.land(soi_data, fixed_assets) # put all asset data together asset_data = read_bea.combine( - fixed_assets, inventories, land, res_assets, owner_occ_dict) + fixed_assets, inventories, land, res_assets, owner_occ_dict +) # collapse over different entity types and just get the sum of corporate # and non-corporate by industry and asset type -asset_data_by_tax_treat = pd.DataFrame(asset_data.groupby( - ['tax_treat', 'Asset Type', 'assets', 'bea_asset_code', - 'bea_ind_code', 'Industry', 'minor_code_alt']).sum()).reset_index() -asset_data_by_tax_treat.drop(columns=['level_0', 'index'], inplace=True) +asset_data_by_tax_treat = pd.DataFrame( + asset_data.groupby( + [ + "tax_treat", + "Asset Type", + "assets", + "bea_asset_code", + "bea_ind_code", + "Industry", + "minor_code_alt", + ] + ).sum() +).reset_index() +asset_data_by_tax_treat.drop(columns=["level_0", "index"], inplace=True) # Merge in major industry and asset grouping names... # Add major asset group -asset_data_by_tax_treat['major_asset_group'] =\ - asset_data_by_tax_treat['Asset Type'] -asset_data_by_tax_treat['major_asset_group'].replace(MAJOR_ASSET_GROUPS, - inplace=True) +asset_data_by_tax_treat["major_asset_group"] = asset_data_by_tax_treat[ + "Asset Type" +] +asset_data_by_tax_treat["major_asset_group"].replace( + MAJOR_ASSET_GROUPS, inplace=True +) # Add minor asset group -asset_data_by_tax_treat['minor_asset_group'] =\ - asset_data_by_tax_treat['Asset Type'] -asset_data_by_tax_treat['minor_asset_group'].replace(MINOR_ASSET_GROUPS, - inplace=True) +asset_data_by_tax_treat["minor_asset_group"] = asset_data_by_tax_treat[ + "Asset Type" +] +asset_data_by_tax_treat["minor_asset_group"].replace( + MINOR_ASSET_GROUPS, inplace=True +) # Add major industry groupings -asset_data_by_tax_treat['Industry'] =\ - asset_data_by_tax_treat['Industry'].str.strip() -asset_data_by_tax_treat['major_industry'] =\ - asset_data_by_tax_treat['bea_ind_code'] -asset_data_by_tax_treat['major_industry'].replace(BEA_CODE_DICT, - inplace=True) +asset_data_by_tax_treat["Industry"] = asset_data_by_tax_treat[ + "Industry" +].str.strip() +asset_data_by_tax_treat["major_industry"] = asset_data_by_tax_treat[ + "bea_ind_code" +] +asset_data_by_tax_treat["major_industry"].replace(BEA_CODE_DICT, inplace=True) # Merge in economic depreciation rates and tax depreciation systems deprec_info = pull_depreciation.get_depr() asset_data_by_tax_treat = asset_data_by_tax_treat.merge( - deprec_info, on='bea_asset_code', how='left', copy=True) + deprec_info, on="bea_asset_code", how="left", copy=True +) # Give land and inventories depreciation info? # clean up -asset_data_by_tax_treat.drop(columns=['Asset Type_x', 'Asset Type_y'], - inplace=True) -asset_data_by_tax_treat.rename( - columns={"Asset": "asset_name"}, inplace=True) +asset_data_by_tax_treat.drop( + columns=["Asset Type_x", "Asset Type_y"], inplace=True +) +asset_data_by_tax_treat.rename(columns={"Asset": "asset_name"}, inplace=True) # save result to csv -asset_data_by_tax_treat.to_csv(os.path.join(_CUR_DIR, - 'ccc_asset_data.csv')) +asset_data_by_tax_treat.to_csv(os.path.join(_CUR_DIR, "ccc_asset_data.csv")) diff --git a/data/data_paths.py b/data/data_paths.py index 424b4404..b242cb46 100644 --- a/data/data_paths.py +++ b/data/data_paths.py @@ -15,49 +15,56 @@ def get_paths(): paths = {} _CUR_DIR = _MAIN_DIR = os.path.dirname(os.path.abspath(__file__)) _DATA_DIR = _MAIN_DIR - paths['_CUR_DIR'] = _CUR_DIR - paths['_MAIN_DIR'] = paths['_DATA_DIR'] = _MAIN_DIR - paths['_RATE_DIR'] = os.path.join(_DATA_DIR, 'depreciation_rates') - paths['_REF_DIR'] = os.path.join(_DATA_DIR, 'reference_data') - paths['_RAW_DIR'] = _RAW_DIR = os.path.join(_DATA_DIR, 'raw_data') - paths['_DEPR_DIR'] = _DEPR_DIR = os.path.join(_DATA_DIR, - 'depreciation_rates') - paths['_BEA_DIR'] = _BEA_DIR = os.path.join(_RAW_DIR, 'BEA') - paths['_FIN_ACCT_DIR'] = _FIN_ACCT_DIR =\ - os.path.join(_RAW_DIR, 'national_accounts') - paths['_BEA_ASSET_PATH'] = os.path.join(_BEA_DIR, - "detailnonres_stk1.xlsx") - paths['_SOI_BEA_CROSS'] = os.path.join(_BEA_DIR, - 'soi_bea_industry_codes.csv') - paths['_BEA_INV'] = os.path.join(_BEA_DIR, 'NIPA_5.8.5B.xls') - paths['_BEA_RES'] = os.path.join(_BEA_DIR, - 'BEA_StdFixedAsset_Table5.1.xls') - paths['_LAND_PATH'] = os.path.join(_FIN_ACCT_DIR, '') - paths['_B101_PATH'] = os.path.join(_FIN_ACCT_DIR, 'b101.csv') - paths['_ECON_DEPR_IN_PATH'] = os.path.join( - _DEPR_DIR, 'Economic Depreciation Rates.csv') - paths['_TAX_DEPR'] = os.path.join(_DEPR_DIR, - 'tax_depreciation_rates.csv') - paths['_SOI_DIR'] = _SOI_DIR = os.path.join(_RAW_DIR, 'soi') - paths['_CORP_DIR'] = _CORP_DIR = os.path.join(_SOI_DIR, 'soi_corporate') - paths['_TOT_CORP_IN_PATH'] = os.path.join(_CORP_DIR, '2013sb1.csv') - paths['_S_CORP_IN_PATH'] = os.path.join(_CORP_DIR, '2013sb3.csv') - paths['_PRT_DIR'] = _PRT_DIR = os.path.join(_SOI_DIR, 'soi_partner') - paths['_DETAIL_PART_CROSS_PATH'] = os.path.join( - _PRT_DIR, 'partner_crosswalk_detailed_industries.csv') - paths['_INC_FILE'] = os.path.join(_PRT_DIR, '13pa01.xls') - paths['_AST_FILE'] = os.path.join(_PRT_DIR, '13pa03.xls') - paths['_TYP_IN_CROSS_PATH'] = os.path.join(_PRT_DIR, - '13pa05_Crosswalk.csv') - paths['_TYP_FILE'] = os.path.join(_PRT_DIR, '13pa05.xls') - paths['_PROP_DIR'] = _PROP_DIR = os.path.join(_SOI_DIR, - 'soi_proprietorship') - paths['_PRT_DIR'] = _PRT_DIR = os.path.join(_SOI_DIR, 'soi_partner') - paths['_NFARM_PATH'] = os.path.join(_PROP_DIR, '13sp01br.xls') - paths['_NFARM_INV'] = os.path.join(_PROP_DIR, '13sp02is.xls') - paths['_FARM_IN_PATH'] = os.path.join(_PROP_DIR, 'farm_data.csv') - paths['_DETAIL_SOLE_PROP_CROSS_PATH'] = os.path.join( - _PROP_DIR, 'detail_sole_prop_crosswalk.csv') - paths['_SOI_CODES'] = os.path.join(_SOI_DIR, 'SOI_codes.csv') + paths["_CUR_DIR"] = _CUR_DIR + paths["_MAIN_DIR"] = paths["_DATA_DIR"] = _MAIN_DIR + paths["_RATE_DIR"] = os.path.join(_DATA_DIR, "depreciation_rates") + paths["_REF_DIR"] = os.path.join(_DATA_DIR, "reference_data") + paths["_RAW_DIR"] = _RAW_DIR = os.path.join(_DATA_DIR, "raw_data") + paths["_DEPR_DIR"] = _DEPR_DIR = os.path.join( + _DATA_DIR, "depreciation_rates" + ) + paths["_BEA_DIR"] = _BEA_DIR = os.path.join(_RAW_DIR, "BEA") + paths["_FIN_ACCT_DIR"] = _FIN_ACCT_DIR = os.path.join( + _RAW_DIR, "national_accounts" + ) + paths["_BEA_ASSET_PATH"] = os.path.join(_BEA_DIR, "detailnonres_stk1.xlsx") + paths["_SOI_BEA_CROSS"] = os.path.join( + _BEA_DIR, "soi_bea_industry_codes.csv" + ) + paths["_BEA_INV"] = os.path.join(_BEA_DIR, "NIPA_5.8.5B.xls") + paths["_BEA_RES"] = os.path.join( + _BEA_DIR, "BEA_StdFixedAsset_Table5.1.xls" + ) + paths["_LAND_PATH"] = os.path.join(_FIN_ACCT_DIR, "") + paths["_B101_PATH"] = os.path.join(_FIN_ACCT_DIR, "b101.csv") + paths["_ECON_DEPR_IN_PATH"] = os.path.join( + _DEPR_DIR, "Economic Depreciation Rates.csv" + ) + paths["_TAX_DEPR"] = os.path.join(_DEPR_DIR, "tax_depreciation_rates.csv") + paths["_SOI_DIR"] = _SOI_DIR = os.path.join(_RAW_DIR, "soi") + paths["_CORP_DIR"] = _CORP_DIR = os.path.join(_SOI_DIR, "soi_corporate") + paths["_TOT_CORP_IN_PATH"] = os.path.join(_CORP_DIR, "2013sb1.csv") + paths["_S_CORP_IN_PATH"] = os.path.join(_CORP_DIR, "2013sb3.csv") + paths["_PRT_DIR"] = _PRT_DIR = os.path.join(_SOI_DIR, "soi_partner") + paths["_DETAIL_PART_CROSS_PATH"] = os.path.join( + _PRT_DIR, "partner_crosswalk_detailed_industries.csv" + ) + paths["_INC_FILE"] = os.path.join(_PRT_DIR, "13pa01.xls") + paths["_AST_FILE"] = os.path.join(_PRT_DIR, "13pa03.xls") + paths["_TYP_IN_CROSS_PATH"] = os.path.join( + _PRT_DIR, "13pa05_Crosswalk.csv" + ) + paths["_TYP_FILE"] = os.path.join(_PRT_DIR, "13pa05.xls") + paths["_PROP_DIR"] = _PROP_DIR = os.path.join( + _SOI_DIR, "soi_proprietorship" + ) + paths["_PRT_DIR"] = _PRT_DIR = os.path.join(_SOI_DIR, "soi_partner") + paths["_NFARM_PATH"] = os.path.join(_PROP_DIR, "13sp01br.xls") + paths["_NFARM_INV"] = os.path.join(_PROP_DIR, "13sp02is.xls") + paths["_FARM_IN_PATH"] = os.path.join(_PROP_DIR, "farm_data.csv") + paths["_DETAIL_SOLE_PROP_CROSS_PATH"] = os.path.join( + _PROP_DIR, "detail_sole_prop_crosswalk.csv" + ) + paths["_SOI_CODES"] = os.path.join(_SOI_DIR, "SOI_codes.csv") return paths diff --git a/data/pull_soi_corp.py b/data/pull_soi_corp.py index 14397dcd..75fe69ad 100644 --- a/data/pull_soi_corp.py +++ b/data/pull_soi_corp.py @@ -8,26 +8,34 @@ between the two, the c corporation data can be allocated to all the industries. """ + # Packages: import pandas as pd + # Directory names: from data_paths import get_paths + globals().update(get_paths()) -_DFLT_S_CORP_COLS_DICT = DFLT_S_CORP_COLS_DICT =\ - dict([('depreciable_assets', 'DPRCBL_ASSTS'), - ('accumulated_depreciation', 'ACCUM_DPR'), ('land', 'LAND'), - ('inventories', 'INVNTRY'), ('interest_paid', 'INTRST_PD'), - ('Capital_stock', 'CAP_STCK'), - ('additional_paid-in_capital', 'PD_CAP_SRPLS'), - ('earnings_(rtnd_appr.)', ''), - ('earnings_(rtnd_unappr.)', 'COMP_RTND_ERNGS_UNAPPR'), - ('cost_of_treasury_stock', 'CST_TRSRY_STCK'), - ('depreciation', 'NET_DPR')]) -_CORP_FILE_FCTR = 10 ** 3 -_NAICS_COL_NM = 'INDY_CD' -_CODE_RANGE = ['32', '33', '45', '49'] -_PARENTS = {'32': '31', '33': '31', '45': '44', '49': '48'} +_DFLT_S_CORP_COLS_DICT = DFLT_S_CORP_COLS_DICT = dict( + [ + ("depreciable_assets", "DPRCBL_ASSTS"), + ("accumulated_depreciation", "ACCUM_DPR"), + ("land", "LAND"), + ("inventories", "INVNTRY"), + ("interest_paid", "INTRST_PD"), + ("Capital_stock", "CAP_STCK"), + ("additional_paid-in_capital", "PD_CAP_SRPLS"), + ("earnings_(rtnd_appr.)", ""), + ("earnings_(rtnd_unappr.)", "COMP_RTND_ERNGS_UNAPPR"), + ("cost_of_treasury_stock", "CST_TRSRY_STCK"), + ("depreciation", "NET_DPR"), + ] +) +_CORP_FILE_FCTR = 10**3 +_NAICS_COL_NM = "INDY_CD" +_CODE_RANGE = ["32", "33", "45", "49"] +_PARENTS = {"32": "31", "33": "31", "45": "44", "49": "48"} def load_corp_data(): @@ -45,13 +53,13 @@ def load_corp_data(): cols_dict = _DFLT_S_CORP_COLS_DICT # Dataframe column names columns = list(str(x) for x in cols_dict.values()) - columns.remove('') + columns.remove("") # Opening the soi S-corporate data file: try: s_corp = pd.read_csv(_S_CORP_IN_PATH).fillna(0) - s_corp = s_corp.drop(s_corp[s_corp['AC'] > 1.].index) + s_corp = s_corp.drop(s_corp[s_corp["AC"] > 1.0].index) # drop total across all industries - s_corp = s_corp.drop(s_corp[s_corp['INDY_CD'] == 1.].index) + s_corp = s_corp.drop(s_corp[s_corp["INDY_CD"] == 1.0].index) # put in dollars (data in 1000s) s_corp[columns] = s_corp[columns] * _CORP_FILE_FCTR except IOError: @@ -60,10 +68,10 @@ def load_corp_data(): # Opening the soi Total-corporate data file: try: tot_corp = pd.read_csv(_TOT_CORP_IN_PATH).fillna(0) - tot_corp = tot_corp.drop(tot_corp[tot_corp['AC'] > 1.].index) + tot_corp = tot_corp.drop(tot_corp[tot_corp["AC"] > 1.0].index) # drop total across all industries - tot_corp = tot_corp.drop(tot_corp[tot_corp['INDY_CD'] == 1.].index) - tot_corp = tot_corp[['INDY_CD'] + columns].copy() + tot_corp = tot_corp.drop(tot_corp[tot_corp["INDY_CD"] == 1.0].index) + tot_corp = tot_corp[["INDY_CD"] + columns].copy() # put in dollars (data in 1000s) tot_corp[columns] = tot_corp[columns] * _CORP_FILE_FCTR except IOError: @@ -71,20 +79,25 @@ def load_corp_data(): raise # read in crosswalk for bea and soi industry codes - soi_bea_ind_codes = pd.read_csv(_SOI_BEA_CROSS, - dtype={'bea_ind_code': str}) - soi_bea_ind_codes.drop('notes', axis=1, inplace=True) + soi_bea_ind_codes = pd.read_csv( + _SOI_BEA_CROSS, dtype={"bea_ind_code": str} + ) + soi_bea_ind_codes.drop("notes", axis=1, inplace=True) # drop one repeated minor ind code in crosswalk - soi_bea_ind_codes.drop_duplicates(subset=['minor_code_alt'], - inplace=True) + soi_bea_ind_codes.drop_duplicates(subset=["minor_code_alt"], inplace=True) # merge codes to total corp data # inner join means that we keep only rows that match in both datasets # this should keep only unique soi minor industries # in total corp data - note that s corp data already unique by sector tot_corp = tot_corp.merge( - soi_bea_ind_codes, how='inner', left_on=['INDY_CD'], - right_on=['minor_code_alt'], suffixes=('_x', '_y'), copy=True) + soi_bea_ind_codes, + how="inner", + left_on=["INDY_CD"], + right_on=["minor_code_alt"], + suffixes=("_x", "_y"), + copy=True, + ) # apportion s corp data across industries within sectors so has same # level of industry detail as total corp data @@ -92,16 +105,16 @@ def load_corp_data(): # merge s corp and total corp to find c corp only c_corp = tot_corp.merge( - s_corp, how='inner', on=['INDY_CD'], suffixes=('_x', '_y'), - copy=True) + s_corp, how="inner", on=["INDY_CD"], suffixes=("_x", "_y"), copy=True + ) # calculate s corp values by minor industry using ratios for var in columns: - c_corp[var] = c_corp[var + '_x'] - c_corp[var + '_y'] + c_corp[var] = c_corp[var + "_x"] - c_corp[var + "_y"] # clean up data by dropping and renaming columns - c_corp.drop(list(x + '_x' for x in columns), axis=1, inplace=True) - c_corp.drop(list(x + '_y' for x in columns), axis=1, inplace=True) + c_corp.drop(list(x + "_x" for x in columns), axis=1, inplace=True) + c_corp.drop(list(x + "_y" for x in columns), axis=1, inplace=True) # NOTE: # totals in s_corp match totals in SOI data @@ -110,21 +123,38 @@ def load_corp_data(): # not return value for "all industries". It's within 1%, but # difference can't be accounted for (sum over industry > totals for # all industries) - s_corp.rename(columns={"LAND": "Land", "INVNTRY": "Inventories", - "DPRCBL_ASSTS": "Fixed Assets", - "NET_DPR": "Depreciation", - "INDY_CD": "minor_code_alt"}, inplace=True) - c_corp.rename(columns={"LAND": "Land", "INVNTRY": "Inventories", - "DPRCBL_ASSTS": "Fixed Assets", - "NET_DPR": "Depreciation"}, - inplace=True) - tot_corp.rename(columns={"LAND": "Land", "INVNTRY": "Inventories", - "DPRCBL_ASSTS": "Fixed Assets", - "NET_DPR": "Depreciation", - "INDY_CD": "minor_code_alt"}, inplace=True) + s_corp.rename( + columns={ + "LAND": "Land", + "INVNTRY": "Inventories", + "DPRCBL_ASSTS": "Fixed Assets", + "NET_DPR": "Depreciation", + "INDY_CD": "minor_code_alt", + }, + inplace=True, + ) + c_corp.rename( + columns={ + "LAND": "Land", + "INVNTRY": "Inventories", + "DPRCBL_ASSTS": "Fixed Assets", + "NET_DPR": "Depreciation", + }, + inplace=True, + ) + tot_corp.rename( + columns={ + "LAND": "Land", + "INVNTRY": "Inventories", + "DPRCBL_ASSTS": "Fixed Assets", + "NET_DPR": "Depreciation", + "INDY_CD": "minor_code_alt", + }, + inplace=True, + ) # Creates a dictionary of a sector : dataframe - corp_data = {'tot_corp': tot_corp, 'c_corp': c_corp, 's_corp': s_corp} + corp_data = {"tot_corp": tot_corp, "c_corp": c_corp, "s_corp": s_corp} for k, v in corp_data.items(): v.rename({c: str(c) for c in v.columns}) return corp_data @@ -146,11 +176,11 @@ def calc_proportions(tot_corp, s_corp, columns): """ # find ratio of variable in minor industry to variable in sector # in total corp data - corp_ratios = tot_corp[['INDY_CD', 'sector_code'] + columns].copy() + corp_ratios = tot_corp[["INDY_CD", "sector_code"] + columns].copy() for var in columns: - corp_ratios[var + '_ratio'] =\ - tot_corp.groupby(['sector_code'])[var].apply(lambda x: x / - float(x.sum())) + corp_ratios[var + "_ratio"] = tot_corp.groupby(["sector_code"])[ + var + ].apply(lambda x: x / float(x.sum())) corp_ratios.drop(columns, axis=1, inplace=True) @@ -159,19 +189,25 @@ def calc_proportions(tot_corp, s_corp, columns): # first just keep s corp columns want_ # merge ratios to s corp data s_corp = corp_ratios.merge( - s_corp, how='inner', left_on=['sector_code'], - right_on=['INDY_CD'], suffixes=('_x', '_y'), copy=True, - indicator=True) + s_corp, + how="inner", + left_on=["sector_code"], + right_on=["INDY_CD"], + suffixes=("_x", "_y"), + copy=True, + indicator=True, + ) # calculate s corp values by minor industry using ratios for var in columns: - s_corp[var + '_final'] = s_corp[var] * s_corp[var + '_ratio'] + s_corp[var + "_final"] = s_corp[var] * s_corp[var + "_ratio"] # clean up data by dropping and renaming columns - s_corp.drop(['INDY_CD_y', '_merge', 'sector_code'] + columns, axis=1, - inplace=True) - s_corp.drop(list(x + '_ratio' for x in columns), axis=1, inplace=True) + s_corp.drop( + ["INDY_CD_y", "_merge", "sector_code"] + columns, axis=1, inplace=True + ) + s_corp.drop(list(x + "_ratio" for x in columns), axis=1, inplace=True) s_corp.rename(columns={"INDY_CD_x": "INDY_CD"}, inplace=True) - s_corp.columns = s_corp.columns.str.replace('_final', '') + s_corp.columns = s_corp.columns.str.replace("_final", "") return s_corp diff --git a/data/pull_soi_partner.py b/data/pull_soi_partner.py index 4cb518f4..c0cb59d4 100644 --- a/data/pull_soi_partner.py +++ b/data/pull_soi_partner.py @@ -15,13 +15,14 @@ import pandas as pd from ccc.utils import to_str from data_paths import get_paths + globals().update(get_paths()) # Constants -_AST_FILE_FCTR = 10 ** 3 +_AST_FILE_FCTR = 10**3 _SHAPE = (131, 4) -_CODE_RANGE = ['32', '33', '45', '49'] -_PARENTS = {'32': '31', '33': '31', '45': '44', '49': '48'} +_CODE_RANGE = ["32", "33", "45", "49"] +_PARENTS = {"32": "31", "33": "31", "45": "44", "49": "48"} def load_partner_data(entity_dfs): @@ -71,13 +72,13 @@ def load_partner_data(entity_dfs): # and land for partnerhsips xwalk = pd.read_csv(_DETAIL_PART_CROSS_PATH) xwalk.rename({k: str(k) for k in xwalk.columns}, inplace=True) - xwalk['Industry:'] =\ - xwalk['Industry:'].apply(lambda x: re.sub(r'[\s+]', '', x)) + xwalk["Industry:"] = xwalk["Industry:"].apply( + lambda x: re.sub(r"[\s+]", "", x) + ) # keep only codes that help to identify complete industries - xwalk = xwalk[xwalk['complete'] == 1] + xwalk = xwalk[xwalk["complete"] == 1] # read in partner data - partner assets - df = format_excel( - pd.read_excel(_AST_FILE, skiprows=2, skipfooter=6)) + df = format_excel(pd.read_excel(_AST_FILE, skiprows=2, skipfooter=6)) # Cuts off the repeated columns so only the data for all # partnerships remains df.index = [to_str(x) for x in df.index] @@ -88,20 +89,18 @@ def load_partner_data(entity_dfs): # Keep only variables of interest df03.columns = [to_str(c) for c in df03.columns] try: - df03['Fixed Assets'] =\ - (df03['Depreciable assets'] - - df03['Less: Accumulated depreciation']) + df03["Fixed Assets"] = ( + df03["Depreciable assets"] + - df03["Less: Accumulated depreciation"] + ) except Exception: print(df03.columns) raise - df03 = df03[['Item', 'Fixed Assets', 'Inventories', - 'Land']] - df03['Item'] =\ - df03['Item'].apply(lambda x: re.sub(r'[\s+]', '', x)) + df03 = df03[["Item", "Fixed Assets", "Inventories", "Land"]] + df03["Item"] = df03["Item"].apply(lambda x: re.sub(r"[\s+]", "", x)) # partner data - income - df01 = format_excel( - pd.read_excel(_INC_FILE, skiprows=2, skipfooter=6)) + df01 = format_excel(pd.read_excel(_INC_FILE, skiprows=2, skipfooter=6)) # Cuts off the repeated columns so only the data for all # partnerships remains df01 = df01.T.groupby(sort=False, level=0).first().T @@ -109,88 +108,125 @@ def load_partner_data(entity_dfs): # Fixing the index labels of the new dataframe df01.reset_index(inplace=True, drop=True) # Keep only variables of interest - df01 = df01[['Item', 'Depreciation']] - df01['Item old'] = df01['Item'].str.strip() - df01['Item'] =\ - df01['Item'].apply(lambda x: re.sub(r'[\s+]', '', x)) + df01 = df01[["Item", "Depreciation"]] + df01["Item old"] = df01["Item"].str.strip() + df01["Item"] = df01["Item"].apply(lambda x: re.sub(r"[\s+]", "", x)) # merge two partner data sources together so that all variables together - df03 = df03.merge(df01, how='inner', on=['Item'], copy=True) + df03 = df03.merge(df01, how="inner", on=["Item"], copy=True) # merge industry codes to partner data - df03 = df03.merge(xwalk, how='inner', left_on=['Item'], - right_on=['Industry:'], copy=True) - df03.drop(['Item', 'Industry:', 'Codes:', 'Notes:', - 'complete'], axis=1, inplace=True) + df03 = df03.merge( + xwalk, how="inner", left_on=["Item"], right_on=["Industry:"], copy=True + ) + df03.drop( + ["Item", "Industry:", "Codes:", "Notes:", "complete"], + axis=1, + inplace=True, + ) # Sums together the repeated codes into one industry - df03 = df03.groupby('INDY_CD', sort=False).sum() + df03 = df03.groupby("INDY_CD", sort=False).sum() df03.reset_index(inplace=True) # create ratios for minor industry assets using corporate # data read in crosswalk for bea and soi industry codes soi_bea_ind_codes = pd.read_csv( - _SOI_BEA_CROSS, dtype={'bea_ind_code': str}) - soi_bea_ind_codes.columns =\ - [to_str(c) for c in soi_bea_ind_codes.columns] - soi_bea_ind_codes.drop('notes', axis=1, inplace=True) + _SOI_BEA_CROSS, dtype={"bea_ind_code": str} + ) + soi_bea_ind_codes.columns = [to_str(c) for c in soi_bea_ind_codes.columns] + soi_bea_ind_codes.drop("notes", axis=1, inplace=True) # drop one repeated minor ind code in crosswalk - soi_bea_ind_codes.drop_duplicates( - subset=['minor_code_alt'], inplace=True) + soi_bea_ind_codes.drop_duplicates(subset=["minor_code_alt"], inplace=True) # merge codes to partner data # likely better way to do this... - df03_sector = df03[(df03['INDY_CD'] > 9) & - (df03['INDY_CD'] < 100)] - df03_major = df03[(df03['INDY_CD'] > 99) & - (df03['INDY_CD'] < 1000)] - df03_minor = df03[(df03['INDY_CD'] > 99999) & - (df03['INDY_CD'] < 1000000)] + df03_sector = df03[(df03["INDY_CD"] > 9) & (df03["INDY_CD"] < 100)] + df03_major = df03[(df03["INDY_CD"] > 99) & (df03["INDY_CD"] < 1000)] + df03_minor = df03[(df03["INDY_CD"] > 99999) & (df03["INDY_CD"] < 1000000)] sector_df = df03_sector.merge( - soi_bea_ind_codes, how='inner', left_on=['INDY_CD'], - right_on=['sector_code'], copy=True, indicator=True) + soi_bea_ind_codes, + how="inner", + left_on=["INDY_CD"], + right_on=["sector_code"], + copy=True, + indicator=True, + ) major_df = df03_major.merge( - soi_bea_ind_codes, how='inner', left_on=['INDY_CD'], - right_on=['major_code'], copy=True, indicator=True) + soi_bea_ind_codes, + how="inner", + left_on=["INDY_CD"], + right_on=["major_code"], + copy=True, + indicator=True, + ) minor_df = df03_minor.merge( - soi_bea_ind_codes, how='inner', left_on=['INDY_CD'], - right_on=['minor_code'], copy=True, indicator=True) - part_data = sector_df.append( - [major_df, minor_df], sort=True, - ignore_index=True).copy().reset_index() - part_data.drop(['bea_inv_name', 'bea_code', '_merge'], axis=1, - inplace=True) + soi_bea_ind_codes, + how="inner", + left_on=["INDY_CD"], + right_on=["minor_code"], + copy=True, + indicator=True, + ) + part_data = ( + sector_df.append([major_df, minor_df], sort=True, ignore_index=True) + .copy() + .reset_index() + ) + part_data.drop( + ["bea_inv_name", "bea_code", "_merge"], axis=1, inplace=True + ) # merge codes to total corp data # inner join means that we keep only rows that match in both datasets # this should keep only unique soi minor industries # in total corp data - note that s corp data already unique by sector - columns = ['Fixed Assets', 'Inventories', 'Land', 'Depreciation'] - s_corp = entity_dfs['s_corp'][['minor_code_alt']+columns] + columns = ["Fixed Assets", "Inventories", "Land", "Depreciation"] + s_corp = entity_dfs["s_corp"][["minor_code_alt"] + columns] corp = s_corp.merge( - soi_bea_ind_codes, how='inner', on=['minor_code_alt'], - suffixes=('_x', '_y'), copy=True) + soi_bea_ind_codes, + how="inner", + on=["minor_code_alt"], + suffixes=("_x", "_y"), + copy=True, + ) for var in columns: - corp[var+'_ratio'] =\ - corp.groupby(['major_code'])[var].apply( - lambda x: x / float(x.sum())) + corp[var + "_ratio"] = corp.groupby(["major_code"])[var].apply( + lambda x: x / float(x.sum()) + ) - corp.drop(['bea_inv_name', 'bea_code', 'sector_code', - 'minor_code'] + columns, axis=1, inplace=True) + corp.drop( + ["bea_inv_name", "bea_code", "sector_code", "minor_code"] + columns, + axis=1, + inplace=True, + ) # merge these ratios to the partner data part_data = part_data.merge( - corp, how='right', on=['minor_code_alt'], suffixes=('_x', '_y'), - copy=True) + corp, + how="right", + on=["minor_code_alt"], + suffixes=("_x", "_y"), + copy=True, + ) # allocate capital based on ratios for var in columns: - part_data[var] = part_data[var] * part_data[var + '_ratio'] - - part_data.drop(list(x + '_ratio' for x in columns), axis=1, - inplace=True) - part_data.drop(['index', 'sector_code', 'major_code_x', 'minor_code', - 'INDY_CD', 'major_code_y'], axis=1, inplace=True) + part_data[var] = part_data[var] * part_data[var + "_ratio"] + + part_data.drop(list(x + "_ratio" for x in columns), axis=1, inplace=True) + part_data.drop( + [ + "index", + "sector_code", + "major_code_x", + "minor_code", + "INDY_CD", + "major_code_y", + ], + axis=1, + inplace=True, + ) # !!! Partner data has right industry breakouts, and ratio sum to # 1 in ind, but totals not adding up to SOI controls. Not quite @@ -201,111 +237,154 @@ def load_partner_data(entity_dfs): # Attribute by partner type # Read in data by partner type (gives income allocation by partner type) - df05 = format_excel( - pd.read_excel(_TYP_FILE, skiprows=1, skipfooter=5)) + df05 = format_excel(pd.read_excel(_TYP_FILE, skiprows=1, skipfooter=5)) df05.columns = [to_str(c) for c in df05.columns] - df05 = df05[['Item', 'All partners', 'Corporate general partners', - 'Corporate limited partners', - 'Individual general partners', - 'Individual limited partners', - 'Partnership general partners', - 'Partnership limited partners', - 'Tax-exempt organization general partners', - 'Tax-exempt organization limited partners', - 'Nominee and other general partners', - 'Nominee and other limited partners']] + df05 = df05[ + [ + "Item", + "All partners", + "Corporate general partners", + "Corporate limited partners", + "Individual general partners", + "Individual limited partners", + "Partnership general partners", + "Partnership limited partners", + "Tax-exempt organization general partners", + "Tax-exempt organization limited partners", + "Nominee and other general partners", + "Nominee and other limited partners", + ] + ] # # create dictionary with shorter part type names - part_types = {'Corporate general partners': 'corp_gen', - 'Corporate limited partners': 'corp_lim', - 'Individual general partners': 'indv_gen', - 'Individual limited partners': 'indv_lim', - 'Partnership general partners': 'prt_gen', - 'Partnership limited partners': 'prt_lim', - 'Tax-exempt organization general partners': 'tax_gen', - 'Tax-exempt organization limited partners': 'tax_lim', - 'Nominee and other general partners': 'nom_gen', - 'Nominee and other limited partners': 'nom_lim'} + part_types = { + "Corporate general partners": "corp_gen", + "Corporate limited partners": "corp_lim", + "Individual general partners": "indv_gen", + "Individual limited partners": "indv_lim", + "Partnership general partners": "prt_gen", + "Partnership limited partners": "prt_lim", + "Tax-exempt organization general partners": "tax_gen", + "Tax-exempt organization limited partners": "tax_lim", + "Nominee and other general partners": "nom_gen", + "Nominee and other limited partners": "nom_lim", + } # reshape data - df05 = pd.melt(df05, id_vars=['Item'], - value_vars=['Corporate general partners', - 'Corporate limited partners', - 'Individual general partners', - 'Individual limited partners', - 'Partnership general partners', - 'Partnership limited partners', - 'Tax-exempt organization general partners', - 'Tax-exempt organization limited partners', - 'Nominee and other general partners', - 'Nominee and other limited partners'], - var_name='part_type', value_name='net_inc') + df05 = pd.melt( + df05, + id_vars=["Item"], + value_vars=[ + "Corporate general partners", + "Corporate limited partners", + "Individual general partners", + "Individual limited partners", + "Partnership general partners", + "Partnership limited partners", + "Tax-exempt organization general partners", + "Tax-exempt organization limited partners", + "Nominee and other general partners", + "Nominee and other limited partners", + ], + var_name="part_type", + value_name="net_inc", + ) # merge in codes typ_cross = pd.read_csv(_TYP_IN_CROSS_PATH) typ_cross.columns = [to_str(c) for c in typ_cross.columns] - typ_cross['Industry:'] = typ_cross['Industry:'].str.strip() - df05['Item'] = df05['Item'].str.strip() + typ_cross["Industry:"] = typ_cross["Industry:"].str.strip() + df05["Item"] = df05["Item"].str.strip() df05 = df05.merge( - typ_cross, how='inner', left_on=['Item'], - right_on=['Industry:'], copy=True) + typ_cross, + how="inner", + left_on=["Item"], + right_on=["Industry:"], + copy=True, + ) # # create sums by group - grouped = pd.DataFrame({'sum': df05.groupby(['Codes:']). - apply(abs_sum, 'net_inc')}).reset_index() + grouped = pd.DataFrame( + {"sum": df05.groupby(["Codes:"]).apply(abs_sum, "net_inc")} + ).reset_index() # merge grouped data back to original df # One could make this more efficient - one line of code - with appropriate # pandas methods using groupby and apply above - df05 = df05.merge(grouped, how='left', on=['Codes:'], copy=True) - df05['inc_ratio'] = (df05['net_inc'].astype(float).abs() / - df05['sum'].replace({0: np.nan})).replace({np.nan: 0}) - df05 = df05[['Codes:', 'part_type', 'net_inc', 'inc_ratio']] + df05 = df05.merge(grouped, how="left", on=["Codes:"], copy=True) + df05["inc_ratio"] = ( + df05["net_inc"].astype(float).abs() / df05["sum"].replace({0: np.nan}) + ).replace({np.nan: 0}) + df05 = df05[["Codes:", "part_type", "net_inc", "inc_ratio"]] # manufacturing is missing data for 2013, so use overall partnership splits for key in part_types: - df05.loc[(df05['Codes:'] == 31) & (df05['part_type'] == key), - 'inc_ratio'] = \ - df05.loc[(df05['Codes:'] == 1) & - (df05['part_type'] == key), 'inc_ratio'].values + df05.loc[ + (df05["Codes:"] == 31) & (df05["part_type"] == key), "inc_ratio" + ] = df05.loc[ + (df05["Codes:"] == 1) & (df05["part_type"] == key), "inc_ratio" + ].values # add other sector codes for manufacturing - manu = df05[df05['Codes:'] == 31] + manu = df05[df05["Codes:"] == 31] df_manu = (manu.append(manu, sort=True)).reset_index(drop=True) - df_manu.loc[:len(part_types), 'Codes:'] = 32 - df_manu.loc[len(part_types):, 'Codes:'] = 33 - df05 = df05.append(df_manu, sort=True, - ignore_index=True).reset_index(drop=True).copy() + df_manu.loc[: len(part_types), "Codes:"] = 32 + df_manu.loc[len(part_types) :, "Codes:"] = 33 + df05 = ( + df05.append(df_manu, sort=True, ignore_index=True) + .reset_index(drop=True) + .copy() + ) # # Merge SOI codes to BEA data - df05_sector = df05[(df05['Codes:'] > 9) & (df05['Codes:'] < 100)] - df05_major = df05[(df05['Codes:'] > 99) & (df05['Codes:'] < 1000)] - df05_minor = df05[(df05['Codes:'] > 99999) & (df05['Codes:'] < 1000000)] + df05_sector = df05[(df05["Codes:"] > 9) & (df05["Codes:"] < 100)] + df05_major = df05[(df05["Codes:"] > 99) & (df05["Codes:"] < 1000)] + df05_minor = df05[(df05["Codes:"] > 99999) & (df05["Codes:"] < 1000000)] sector_df = df05_sector.merge( - soi_bea_ind_codes, how='inner', left_on=['Codes:'], - right_on=['sector_code'], copy=True, indicator=True) + soi_bea_ind_codes, + how="inner", + left_on=["Codes:"], + right_on=["sector_code"], + copy=True, + indicator=True, + ) major_df = df05_major.merge( - soi_bea_ind_codes, how='inner', left_on=['Codes:'], - right_on=['major_code'], copy=True, indicator=True) + soi_bea_ind_codes, + how="inner", + left_on=["Codes:"], + right_on=["major_code"], + copy=True, + indicator=True, + ) minor_df = df05_minor.merge( - soi_bea_ind_codes, how='inner', left_on=['Codes:'], - right_on=['minor_code'], copy=True, indicator=True) - df05 = sector_df.append([major_df, minor_df], sort=True, - ignore_index=True).copy().reset_index() - df05.drop(['bea_inv_name', 'bea_code', '_merge'], axis=1, - inplace=True) + soi_bea_ind_codes, + how="inner", + left_on=["Codes:"], + right_on=["minor_code"], + copy=True, + indicator=True, + ) + df05 = ( + sector_df.append([major_df, minor_df], sort=True, ignore_index=True) + .copy() + .reset_index() + ) + df05.drop(["bea_inv_name", "bea_code", "_merge"], axis=1, inplace=True) # # merge partner type ratios with partner asset data part_assets = df05.merge( - part_data, how='left', on=['minor_code_alt'], copy=True, - indicator=True) - part_assets.drop(['Codes:', '_merge'], axis=1, inplace=True) + part_data, how="left", on=["minor_code_alt"], copy=True, indicator=True + ) + part_assets.drop(["Codes:", "_merge"], axis=1, inplace=True) # allocate across partner type - part_assets['Fixed Assets'] = (part_assets['Fixed Assets'] * - part_assets['inc_ratio']) - part_assets['Inventories'] = (part_assets['Inventories'] * - part_assets['inc_ratio']) - part_assets['Land'] = part_assets['Land'] * part_assets['inc_ratio'] - part_assets['Depreciation'] = (part_assets['Depreciation'] * - part_assets['inc_ratio']) - part_data = {'part_data': part_assets} + part_assets["Fixed Assets"] = ( + part_assets["Fixed Assets"] * part_assets["inc_ratio"] + ) + part_assets["Inventories"] = ( + part_assets["Inventories"] * part_assets["inc_ratio"] + ) + part_assets["Land"] = part_assets["Land"] * part_assets["inc_ratio"] + part_assets["Depreciation"] = ( + part_assets["Depreciation"] * part_assets["inc_ratio"] + ) + part_data = {"part_data": part_assets} return part_data @@ -343,18 +422,19 @@ def format_excel(df): while isinstance(element, float): element = df.iloc[j, :][i] j += 1 - df.iloc[0, :][i] = element.replace('\n', ' ').replace(' ', ' ') + df.iloc[0, :][i] = element.replace("\n", " ").replace(" ", " ") df.dropna(inplace=True) df = df.T column_names = df.iloc[0, :].tolist() - column_names = [x.encode('ascii', 'ignore').lstrip().rstrip() for x - in column_names] + column_names = [ + x.encode("ascii", "ignore").lstrip().rstrip() for x in column_names + ] df.columns = column_names - df = df.drop(df.index[[0, len(df)-1]]) + df = df.drop(df.index[[0, len(df) - 1]]) df = df.fillna(0) - df = df.replace('[d]', 0) - df = df.replace('[d] ', 0) - df = df.replace('[2] ', 0) + df = df.replace("[d]", 0) + df = df.replace("[d] ", 0) + df = df.replace("[2] ", 0) df.reset_index(inplace=True, drop=True) df.iloc[:, 1:] = df.iloc[:, 1:] * _AST_FILE_FCTR diff --git a/data/pull_soi_proprietorship.py b/data/pull_soi_proprietorship.py index 90c0aa7f..b93a06d3 100644 --- a/data/pull_soi_proprietorship.py +++ b/data/pull_soi_proprietorship.py @@ -15,6 +15,7 @@ from ccc.utils import to_str import pull_soi_partner as prt from data_paths import get_paths + globals().update(get_paths()) _DDCT_FILE_FCTR = 10**3 @@ -37,212 +38,310 @@ def load_proprietorship_data(entity_dfs): # Opening data on depreciable fixed assets, inventories, and land # for non-farm sole props nonfarm_df = format_dataframe( - pd.read_excel(_NFARM_PATH, skiprows=2, skipfooter=8)) + pd.read_excel(_NFARM_PATH, skiprows=2, skipfooter=8) + ) # Cuts off the repeated columns so only the data for all sole props # remains nonfarm_df = nonfarm_df.T.groupby(sort=False, level=0).first().T # Fixing the index labels of the new dataframe nonfarm_df.reset_index(inplace=True, drop=True) # Keep only variables of interest - nonfarm_df = nonfarm_df[['Industry', 'Depreciation deduction [1,2]']] - nonfarm_df['Industry'] =\ - nonfarm_df['Industry'].apply(lambda x: re.sub(r'[\s+]', '', x)) + nonfarm_df = nonfarm_df[["Industry", "Depreciation deduction [1,2]"]] + nonfarm_df["Industry"] = nonfarm_df["Industry"].apply( + lambda x: re.sub(r"[\s+]", "", x) + ) nonfarm_df.rename( - columns={"Industry": "Item", - "Depreciation deduction [1,2]": "Depreciation"}, - inplace=True) + columns={ + "Industry": "Item", + "Depreciation deduction [1,2]": "Depreciation", + }, + inplace=True, + ) # Opens the nonfarm inventory data nonfarm_inv = prt.format_excel( - pd.read_excel(_NFARM_INV, skiprows=1, skipfooter=8)) + pd.read_excel(_NFARM_INV, skiprows=1, skipfooter=8) + ) # Cuts off the repeated columns so only the data for all sole props remains nonfarm_inv = nonfarm_inv.T.groupby(sort=False, level=0).first().T nonfarm_inv.columns = [to_str(c) for c in nonfarm_inv.columns] # Fixing the index labels of the new dataframe nonfarm_inv.reset_index(inplace=True, drop=True) # Keep only variables of interest - nonfarm_inv = nonfarm_inv[['Net income status, item', - 'Inventory, end of year']] - nonfarm_inv['Net income status, item'] =\ - nonfarm_inv['Net income status, item'].str.strip() - nonfarm_inv.rename(columns={"Net income status, item": "Item", - "Inventory, end of year": "Inventories"}, - inplace=True) - nonfarm_inv['Item'] =\ - nonfarm_inv['Item'].apply(lambda x: re.sub(r'[\s+]', '', x)) + nonfarm_inv = nonfarm_inv[ + ["Net income status, item", "Inventory, end of year"] + ] + nonfarm_inv["Net income status, item"] = nonfarm_inv[ + "Net income status, item" + ].str.strip() + nonfarm_inv.rename( + columns={ + "Net income status, item": "Item", + "Inventory, end of year": "Inventories", + }, + inplace=True, + ) + nonfarm_inv["Item"] = nonfarm_inv["Item"].apply( + lambda x: re.sub(r"[\s+]", "", x) + ) # merge together two sole prop data sources # have to manually fix a couple names to be compatible - nonfarm_df.loc[nonfarm_df['Item'] == - "Otherambulatoryhealthcareservices(includingambulanceservices,bloodandorganbanks)", - 'Item'] = 'Otherambulatoryhealthcareservices(includingambulanceservices,blood,organbanks)' - nonfarm_df.loc[nonfarm_df['Item'] == - "Officesofrealestateagents,brokers,propertymanagers,andappraisers", - 'Item'] = 'Officesofrealestateagents,brokers,propertymanagersandappraisers' - nonfarm_df.loc[nonfarm_df['Item'] == - "OOtherautorepairandmaintenance(includingoilchange,lubrication,andcarwashes)", - 'Item'] = 'Otherautorepairandmaintenance(includingoilchange,lube,andcarwashes)' - nonfarm_df = nonfarm_df.merge(nonfarm_inv, how='inner', on=['Item'], - copy=True) + nonfarm_df.loc[ + nonfarm_df["Item"] + == "Otherambulatoryhealthcareservices(includingambulanceservices,bloodandorganbanks)", + "Item", + ] = "Otherambulatoryhealthcareservices(includingambulanceservices,blood,organbanks)" + nonfarm_df.loc[ + nonfarm_df["Item"] + == "Officesofrealestateagents,brokers,propertymanagers,andappraisers", + "Item", + ] = "Officesofrealestateagents,brokers,propertymanagersandappraisers" + nonfarm_df.loc[ + nonfarm_df["Item"] + == "OOtherautorepairandmaintenance(includingoilchange,lubrication,andcarwashes)", + "Item", + ] = "Otherautorepairandmaintenance(includingoilchange,lube,andcarwashes)" + nonfarm_df = nonfarm_df.merge( + nonfarm_inv, how="inner", on=["Item"], copy=True + ) # read in crosswalk for these data xwalk = pd.read_csv(_DETAIL_SOLE_PROP_CROSS_PATH) # keep only codes that help to identify complete industries - xwalk = xwalk[xwalk['complete'] == 1] - xwalk = xwalk[['Industry', 'INDY_CD']] - xwalk['Industry'] =\ - xwalk['Industry'].apply(lambda x: re.sub(r'[\s+]', '', x)) + xwalk = xwalk[xwalk["complete"] == 1] + xwalk = xwalk[["Industry", "INDY_CD"]] + xwalk["Industry"] = xwalk["Industry"].apply( + lambda x: re.sub(r"[\s+]", "", x) + ) # merge industry codes to sole prop data - nonfarm_df = nonfarm_df.merge(xwalk, how='inner', left_on=['Item'], - right_on=['Industry'], copy=True) - nonfarm_df.drop(['Item', 'Industry'], axis=1, inplace=True) + nonfarm_df = nonfarm_df.merge( + xwalk, how="inner", left_on=["Item"], right_on=["Industry"], copy=True + ) + nonfarm_df.drop(["Item", "Industry"], axis=1, inplace=True) # Sums together the repeated codes into one industry - nonfarm_df = nonfarm_df.groupby('INDY_CD', sort=False).sum() + nonfarm_df = nonfarm_df.groupby("INDY_CD", sort=False).sum() nonfarm_df.reset_index(inplace=True) # add some rows for industry codes not in the sole prop data because # they are zero - df = pd.DataFrame(columns=('INDY_CD', 'Depreciation', 'Inventories')) - missing_code_list = [312, 517, 519, 524140, 524142, 524143, 524156, - 524159, 55, 521, 525, 531115] + df = pd.DataFrame(columns=("INDY_CD", "Depreciation", "Inventories")) + missing_code_list = [ + 312, + 517, + 519, + 524140, + 524142, + 524143, + 524156, + 524159, + 55, + 521, + 525, + 531115, + ] for i in range(len(missing_code_list)): - df.loc[i] = [int(missing_code_list[i]), 0., 0.] - nonfarm_df = nonfarm_df.append( - df, sort=True, ignore_index=True).copy().reset_index() + df.loc[i] = [int(missing_code_list[i]), 0.0, 0.0] + nonfarm_df = ( + nonfarm_df.append(df, sort=True, ignore_index=True) + .copy() + .reset_index() + ) # attribute over a minor industry only idenfified in w/ other minor # ind in SOI - nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531115, 'Depreciation'] =\ - (nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531135, - 'Depreciation'].values * 0.5) - nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531115, 'Inventories'] =\ - (nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531135, - 'Inventories'].values * 0.5) - nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531135, 'Depreciation'] =\ - (nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531135, - 'Depreciation'].values * 0.5) - nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531135, 'Inventories'] =\ - (nonfarm_df.loc[nonfarm_df['INDY_CD'] == 531135, - 'Inventories'].values * 0.5) + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531115, "Depreciation"] = ( + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531135, "Depreciation"].values + * 0.5 + ) + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531115, "Inventories"] = ( + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531135, "Inventories"].values + * 0.5 + ) + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531135, "Depreciation"] = ( + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531135, "Depreciation"].values + * 0.5 + ) + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531135, "Inventories"] = ( + nonfarm_df.loc[nonfarm_df["INDY_CD"] == 531135, "Inventories"].values + * 0.5 + ) # create ratios for minor industry assets using corporate data # read in crosswalk for bea and soi industry codes soi_bea_ind_codes = pd.read_csv( - _SOI_BEA_CROSS, dtype={'bea_ind_code': str}) - soi_bea_ind_codes.drop('notes', axis=1, inplace=True) + _SOI_BEA_CROSS, dtype={"bea_ind_code": str} + ) + soi_bea_ind_codes.drop("notes", axis=1, inplace=True) # drop one repeated minor ind code in crosswalk - soi_bea_ind_codes.drop_duplicates(subset=['minor_code_alt'], inplace=True) + soi_bea_ind_codes.drop_duplicates(subset=["minor_code_alt"], inplace=True) # merge codes to sole prop data # likely better way to do this... - nonfarm_sector = nonfarm_df[(nonfarm_df['INDY_CD'] > 9) & - (nonfarm_df['INDY_CD'] < 100)] - nonfarm_major = nonfarm_df[(nonfarm_df['INDY_CD'] > 99) & - (nonfarm_df['INDY_CD'] < 1000)] - nonfarm_minor = nonfarm_df[(nonfarm_df['INDY_CD'] > 99999) & - (nonfarm_df['INDY_CD'] < 1000000)] + nonfarm_sector = nonfarm_df[ + (nonfarm_df["INDY_CD"] > 9) & (nonfarm_df["INDY_CD"] < 100) + ] + nonfarm_major = nonfarm_df[ + (nonfarm_df["INDY_CD"] > 99) & (nonfarm_df["INDY_CD"] < 1000) + ] + nonfarm_minor = nonfarm_df[ + (nonfarm_df["INDY_CD"] > 99999) & (nonfarm_df["INDY_CD"] < 1000000) + ] sector_df = nonfarm_sector.merge( - soi_bea_ind_codes, how='inner', left_on=['INDY_CD'], - right_on=['sector_code'], copy=True, indicator=True) + soi_bea_ind_codes, + how="inner", + left_on=["INDY_CD"], + right_on=["sector_code"], + copy=True, + indicator=True, + ) major_df = nonfarm_major.merge( - soi_bea_ind_codes, how='inner', left_on=['INDY_CD'], - right_on=['major_code'], copy=True, indicator=True) + soi_bea_ind_codes, + how="inner", + left_on=["INDY_CD"], + right_on=["major_code"], + copy=True, + indicator=True, + ) minor_df = nonfarm_minor.merge( - soi_bea_ind_codes, how='inner', left_on=['INDY_CD'], - right_on=['minor_code'], copy=True, indicator=True) - nonfarm_data = sector_df.append( - [major_df, minor_df], sort=True, - ignore_index=True).copy().reset_index() - nonfarm_data.drop(['bea_inv_name', 'bea_code', '_merge'], axis=1, - inplace=True) + soi_bea_ind_codes, + how="inner", + left_on=["INDY_CD"], + right_on=["minor_code"], + copy=True, + indicator=True, + ) + nonfarm_data = ( + sector_df.append([major_df, minor_df], sort=True, ignore_index=True) + .copy() + .reset_index() + ) + nonfarm_data.drop( + ["bea_inv_name", "bea_code", "_merge"], axis=1, inplace=True + ) # merge codes to total part data # inner join means that we keep only rows that match in both datasets # this should keep only unique soi minor industries - columns = ['Inventories', 'Depreciation'] - part_data = entity_dfs['part_data'][['minor_code_alt', 'part_type'] - + columns + - ['Land', 'Fixed Assets']].copy() + columns = ["Inventories", "Depreciation"] + part_data = entity_dfs["part_data"][ + ["minor_code_alt", "part_type"] + columns + ["Land", "Fixed Assets"] + ].copy() # sum at industry-partner type level - part_data = part_data.groupby(['minor_code_alt']).sum().reset_index() - part2 = part_data[['minor_code_alt']+columns].copy() + part_data = part_data.groupby(["minor_code_alt"]).sum().reset_index() + part2 = part_data[["minor_code_alt"] + columns].copy() partner = part2.merge( - soi_bea_ind_codes, how='inner', on=['minor_code_alt'], - suffixes=('_x', '_y'), copy=True) + soi_bea_ind_codes, + how="inner", + on=["minor_code_alt"], + suffixes=("_x", "_y"), + copy=True, + ) for var in columns: - partner[var+'_ratio'] =\ - partner.groupby(['major_code'])[var].apply( - lambda x: x / float(x.sum())) - - partner.drop(['bea_inv_name', 'bea_code', 'sector_code', - 'minor_code'] + columns, axis=1, inplace=True) + partner[var + "_ratio"] = partner.groupby(["major_code"])[var].apply( + lambda x: x / float(x.sum()) + ) + + partner.drop( + ["bea_inv_name", "bea_code", "sector_code", "minor_code"] + columns, + axis=1, + inplace=True, + ) # merge these ratios to the sole prop data nonfarm = nonfarm_data.merge( - partner, how='right', on=['minor_code_alt'], - suffixes=('_x', '_y'), copy=True, indicator=True) + partner, + how="right", + on=["minor_code_alt"], + suffixes=("_x", "_y"), + copy=True, + indicator=True, + ) # filling in missing values. This works ok now but need to be # careful as the ratio value could cause problems - nonfarm['Inventories'].fillna(value=0., axis=0, inplace=True) - nonfarm['Inventories_ratio'].fillna(value=1., axis=0, inplace=True) + nonfarm["Inventories"].fillna(value=0.0, axis=0, inplace=True) + nonfarm["Inventories_ratio"].fillna(value=1.0, axis=0, inplace=True) # allocate capital based on ratios for var in columns: - nonfarm.loc[nonfarm['INDY_CD'] > 99999, var + '_ratio'] = 1. - nonfarm[var] = nonfarm[var] * nonfarm[var + '_ratio'] - - nonfarm.drop(list(x + '_ratio' for x in columns), axis=1, inplace=True) - nonfarm.drop(['index', 'sector_code', 'major_code_x', 'minor_code', - 'major_code_y', '_merge'], axis=1, inplace=True) + nonfarm.loc[nonfarm["INDY_CD"] > 99999, var + "_ratio"] = 1.0 + nonfarm[var] = nonfarm[var] * nonfarm[var + "_ratio"] + + nonfarm.drop(list(x + "_ratio" for x in columns), axis=1, inplace=True) + nonfarm.drop( + [ + "index", + "sector_code", + "major_code_x", + "minor_code", + "major_code_y", + "_merge", + ], + axis=1, + inplace=True, + ) # data here totals out for allocable industries (so doesn't hit # SOI totals for all industries bc some not allocated to an industry) # merge in partner data to get ratios need to impute FA's and land - part_ratios = part_data[['minor_code_alt', 'Fixed Assets', - 'Depreciation', 'Land']].copy() - part_ratios['FA_ratio'] = (part_ratios['Fixed Assets'] / - part_ratios['Depreciation']) - part_ratios['Land_ratio'] = (part_ratios['Land'] / - part_ratios['Fixed Assets']) - part_ratios = part_ratios[['minor_code_alt', 'FA_ratio', 'Land_ratio']] + part_ratios = part_data[ + ["minor_code_alt", "Fixed Assets", "Depreciation", "Land"] + ].copy() + part_ratios["FA_ratio"] = ( + part_ratios["Fixed Assets"] / part_ratios["Depreciation"] + ) + part_ratios["Land_ratio"] = ( + part_ratios["Land"] / part_ratios["Fixed Assets"] + ) + part_ratios = part_ratios[["minor_code_alt", "FA_ratio", "Land_ratio"]] nonfarm = nonfarm.merge( - part_ratios, how='inner', on=['minor_code_alt'], - suffixes=('_x', '_y'), copy=True, indicator=False) + part_ratios, + how="inner", + on=["minor_code_alt"], + suffixes=("_x", "_y"), + copy=True, + indicator=False, + ) # need to find ratio of assets from BEA to SOI - bea_ratio = 1. - nonfarm['Fixed Assets'] = (nonfarm['FA_ratio'] * - nonfarm['Depreciation'] * bea_ratio) - nonfarm['Land'] = nonfarm['Land_ratio'] * nonfarm['Fixed Assets'] - nonfarm.drop(['Land_ratio', 'FA_ratio'], axis=1, inplace=True) + bea_ratio = 1.0 + nonfarm["Fixed Assets"] = ( + nonfarm["FA_ratio"] * nonfarm["Depreciation"] * bea_ratio + ) + nonfarm["Land"] = nonfarm["Land_ratio"] * nonfarm["Fixed Assets"] + nonfarm.drop(["Land_ratio", "FA_ratio"], axis=1, inplace=True) # Calculates the FA and Land for Farm sole proprietorships. # Note: we should update so read in raw Census of Agriculture # What about inventories for farm sole props? Worry about?? farm_df = pd.read_csv(_FARM_IN_PATH) - asst_land = farm_df['R_p'][0] + farm_df['Q_p'][0] - land_ratio =\ - np.array((part_data.loc[part_data['minor_code_alt'] == 111, - 'Land'] / - (part_data.loc[part_data['minor_code_alt'] == 111, - 'Fixed Assets'] + - part_data.loc[part_data['minor_code_alt'] == 111, - 'Land']))) + asst_land = farm_df["R_p"][0] + farm_df["Q_p"][0] + land_ratio = np.array( + ( + part_data.loc[part_data["minor_code_alt"] == 111, "Land"] + / ( + part_data.loc[ + part_data["minor_code_alt"] == 111, "Fixed Assets" + ] + + part_data.loc[part_data["minor_code_alt"] == 111, "Land"] + ) + ) + ) part_land = land_ratio * asst_land - sp_farm_land = farm_df['A_sp'][0] * part_land / farm_df['A_p'][0] - sp_farm_assts = farm_df['R_sp'][0] + farm_df['Q_sp'][0] - sp_farm_land + sp_farm_land = farm_df["A_sp"][0] * part_land / farm_df["A_p"][0] + sp_farm_assts = farm_df["R_sp"][0] + farm_df["Q_sp"][0] - sp_farm_land sp_farm_cstock = np.array([sp_farm_assts, 0, sp_farm_land]) # Adds farm data to industry 111 - nonfarm.loc[nonfarm['INDY_CD'] == 111, 'Fixed Assets'] += sp_farm_cstock[0] - nonfarm.loc[nonfarm['INDY_CD'] == 111, 'Land'] += sp_farm_cstock[2] + nonfarm.loc[nonfarm["INDY_CD"] == 111, "Fixed Assets"] += sp_farm_cstock[0] + nonfarm.loc[nonfarm["INDY_CD"] == 111, "Land"] += sp_farm_cstock[2] # Creates the dictionary of sector : dataframe that is returned and # used to update entity_dfs - data = {'sole_prop_data': nonfarm} + data = {"sole_prop_data": nonfarm} return data @@ -261,10 +360,10 @@ def format_columns(nonfarm_df): columns = nonfarm_df.columns.tolist() for i in range(0, len(columns)): column = columns[i] - if '.1' in column: + if ".1" in column: column = column[:-2] - if '\n' in column: - column = column.replace('\n', ' ').replace('\r', '') + if "\n" in column: + column = column.replace("\n", " ").replace("\r", "") column = column.rstrip() columns[i] = column nonfarm_df.columns = columns @@ -287,10 +386,9 @@ def format_dataframe(nonfarm_df): # Creates a list from the first row of the dataframe columns = nonfarm_df.iloc[0].tolist() # Replaces the first item in the list with a new label - columns[0] = 'Industry' + columns[0] = "Industry" # Sets the values of the columns on the dataframes - nonfarm_df.columns = list( - to_str(x).replace('\n', ' ') for x in columns) + nonfarm_df.columns = list(to_str(x).replace("\n", " ") for x in columns) # Drops the first couple of rows and last row in the dataframe nonfarm_df.dropna(inplace=True) # Multiplies each value in the dataframe by a factor of 1000 diff --git a/data/read_bea.py b/data/read_bea.py index 38e21d82..56cbb8b6 100644 --- a/data/read_bea.py +++ b/data/read_bea.py @@ -9,6 +9,7 @@ data. The majority of the script takes these crosswalks and creates dictionaries to map the codes. """ + # Packages: import numpy as np import pandas as pd @@ -18,9 +19,9 @@ globals().update(get_paths()) # Constant factors: -_BEA_IN_FILE_FCTR = 10 ** 6 -_BEA_INV_RES_FCTR = 10 ** 9 -_FIN_ACCT_FILE_FCTR = 10 ** 9 +_BEA_IN_FILE_FCTR = 10**6 +_BEA_INV_RES_FCTR = 10**9 +_FIN_ACCT_FILE_FCTR = 10**9 _START_POS = 8 _SKIP1 = 47 _SKIP2 = 80 @@ -41,70 +42,103 @@ def fixed_assets(soi_data): """ # Read in BEA fixed asset table bea_all = pd.read_excel(_BEA_ASSET_PATH, sheet_name="Datasets") - bea_FA = bea_all[['Unnamed: 0', '2013']].copy() - bea_FA.rename(columns={'Unnamed: 0': 'long_code'}, inplace=True) - bea_FA.dropna(subset=['long_code'], inplace=True) + bea_FA = bea_all[["Unnamed: 0", "2013"]].copy() + bea_FA.rename(columns={"Unnamed: 0": "long_code"}, inplace=True) + bea_FA.dropna(subset=["long_code"], inplace=True) bea_FA.reset_index(drop=True, inplace=True) bea_FA.rename(columns={"2013": "assets"}, inplace=True) - bea_FA['assets'] = bea_FA['assets'] * _BEA_IN_FILE_FCTR - bea_FA['bea_asset_code'] = bea_FA.long_code.str[-6:-2] - bea_FA['bea_ind_code'] = bea_FA.long_code.str[3:7] - bea_FA['bea_asset_code'] = bea_FA['bea_asset_code'].str.strip() + bea_FA["assets"] = bea_FA["assets"] * _BEA_IN_FILE_FCTR + bea_FA["bea_asset_code"] = bea_FA.long_code.str[-6:-2] + bea_FA["bea_ind_code"] = bea_FA.long_code.str[3:7] + bea_FA["bea_asset_code"] = bea_FA["bea_asset_code"].str.strip() # Read in BEA asset names bea_asset_names = pd.read_excel( - _BEA_ASSET_PATH, sheet_name="110C", header=5, - converters={'Asset Codes': str}) - bea_asset_names = bea_asset_names[['Asset Codes', 'NIPA Asset Types']] - bea_asset_names.dropna(subset=['Asset Codes'], inplace=True) - bea_asset_names.rename(columns={"Asset Codes": "bea_asset_code", - "NIPA Asset Types": "Asset Type"}, - inplace=True) - bea_asset_names['bea_asset_code'] =\ - bea_asset_names['bea_asset_code'].str.strip() - bea_asset_names['Asset Type'] =\ - bea_asset_names['Asset Type'].str.strip() + _BEA_ASSET_PATH, + sheet_name="110C", + header=5, + converters={"Asset Codes": str}, + ) + bea_asset_names = bea_asset_names[["Asset Codes", "NIPA Asset Types"]] + bea_asset_names.dropna(subset=["Asset Codes"], inplace=True) + bea_asset_names.rename( + columns={ + "Asset Codes": "bea_asset_code", + "NIPA Asset Types": "Asset Type", + }, + inplace=True, + ) + bea_asset_names["bea_asset_code"] = bea_asset_names[ + "bea_asset_code" + ].str.strip() + bea_asset_names["Asset Type"] = bea_asset_names["Asset Type"].str.strip() # Merge asset names to asset data - bea_FA = bea_FA.merge(bea_asset_names, how='inner', - on=['bea_asset_code'], copy=True) + bea_FA = bea_FA.merge( + bea_asset_names, how="inner", on=["bea_asset_code"], copy=True + ) # Read in BEA industry names bea_ind_names = pd.read_excel( - _BEA_ASSET_PATH, sheet_name="readme", - converters={'BEA CODE': str}, header=14) - bea_ind_names = bea_ind_names[['INDUSTRY TITLE ', 'BEA CODE']] - bea_ind_names.dropna(subset=['BEA CODE'], inplace=True) - bea_ind_names.rename(columns={"INDUSTRY TITLE ": "Industry", - "BEA CODE": "bea_ind_code"}, - inplace=True) + _BEA_ASSET_PATH, + sheet_name="readme", + converters={"BEA CODE": str}, + header=14, + ) + bea_ind_names = bea_ind_names[["INDUSTRY TITLE ", "BEA CODE"]] + bea_ind_names.dropna(subset=["BEA CODE"], inplace=True) + bea_ind_names.rename( + columns={"INDUSTRY TITLE ": "Industry", "BEA CODE": "bea_ind_code"}, + inplace=True, + ) # Merge industry names to asset data - bea_FA = bea_FA.merge(bea_ind_names, how='inner', - on=['bea_ind_code'], copy=True) + bea_FA = bea_FA.merge( + bea_ind_names, how="inner", on=["bea_ind_code"], copy=True + ) # Read in cross-walk between IRS and BEA Industries soi_bea_ind_codes = pd.read_csv( - _SOI_BEA_CROSS, dtype={'bea_ind_code': str}, encoding='utf-8') - soi_bea_ind_codes.drop('notes', axis=1, inplace=True) + _SOI_BEA_CROSS, dtype={"bea_ind_code": str}, encoding="utf-8" + ) + soi_bea_ind_codes.drop("notes", axis=1, inplace=True) # Merge SOI codes to BEA data bea_FA = bea_FA.merge( - soi_bea_ind_codes, how='left', left_on=['bea_ind_code'], - right_on=['bea_code'], copy=True) + soi_bea_ind_codes, + how="left", + left_on=["bea_ind_code"], + right_on=["bea_code"], + copy=True, + ) # Merge SOI data to BEA data - bea_FA = bea_FA[['assets', 'bea_asset_code', 'bea_ind_code', - 'Asset Type', 'minor_code_alt']].copy() - soi_data = soi_data[['minor_code_alt', 'Fixed Assets', 'Land', - 'entity_type', 'tax_treat', 'part_type']].copy() - bea_FA = bea_FA.merge(soi_data, how='right', on=['minor_code_alt'], - copy=True) - bea_FA['FA_ratio'] =\ - bea_FA.groupby(['bea_ind_code', - 'bea_asset_code'])['Fixed Assets'].apply( - lambda x: x / float(x.sum())) - bea_FA['assets'] = bea_FA['FA_ratio'] * bea_FA['assets'] + bea_FA = bea_FA[ + [ + "assets", + "bea_asset_code", + "bea_ind_code", + "Asset Type", + "minor_code_alt", + ] + ].copy() + soi_data = soi_data[ + [ + "minor_code_alt", + "Fixed Assets", + "Land", + "entity_type", + "tax_treat", + "part_type", + ] + ].copy() + bea_FA = bea_FA.merge( + soi_data, how="right", on=["minor_code_alt"], copy=True + ) + bea_FA["FA_ratio"] = bea_FA.groupby(["bea_ind_code", "bea_asset_code"])[ + "Fixed Assets" + ].apply(lambda x: x / float(x.sum())) + bea_FA["assets"] = bea_FA["FA_ratio"] * bea_FA["assets"] # Totals match up w/in rounding error of BEA if exclude Fed banks # (who are not in tax data, so we want to exclude), BEA industry @@ -128,27 +162,33 @@ def inventories(soi_data): # manufacturing and wholesale trade. Not sure how to read those # are unique names otherwise. bea_inventories = pd.read_excel( - _BEA_INV, sheet_name="Sheet0", skiprows=6, skipfooter=4) + _BEA_INV, sheet_name="Sheet0", skiprows=6, skipfooter=4 + ) bea_inventories.reset_index() - bea_inventories = bea_inventories[['Unnamed: 1', 'IV.1']].copy() - bea_inventories.rename(columns={"Unnamed: 1": "bea_inv_name", - "IV.1": "BEA Inventories"}, - inplace=True) - bea_inventories['bea_inv_name'] =\ - bea_inventories['bea_inv_name'].str.strip() - bea_inventories['BEA Inventories'] =\ - bea_inventories['BEA Inventories'] * _BEA_INV_RES_FCTR + bea_inventories = bea_inventories[["Unnamed: 1", "IV.1"]].copy() + bea_inventories.rename( + columns={"Unnamed: 1": "bea_inv_name", "IV.1": "BEA Inventories"}, + inplace=True, + ) + bea_inventories["bea_inv_name"] = bea_inventories[ + "bea_inv_name" + ].str.strip() + bea_inventories["BEA Inventories"] = ( + bea_inventories["BEA Inventories"] * _BEA_INV_RES_FCTR + ) # Merge inventories data to SOI data bea_inventories = bea_inventories.merge( - soi_data, how='right', on=['bea_inv_name'], copy=True) + soi_data, how="right", on=["bea_inv_name"], copy=True + ) # attribute BEA inventories across SOI minor industries - bea_inventories['bea_ratio'] =\ - bea_inventories.groupby(['bea_inv_name'])['Inventories'].apply( - lambda x: x / float(x.sum())) - bea_inventories['BEA Inventories'] =\ - bea_inventories['bea_ratio'] * bea_inventories['BEA Inventories'] + bea_inventories["bea_ratio"] = bea_inventories.groupby(["bea_inv_name"])[ + "Inventories" + ].apply(lambda x: x / float(x.sum())) + bea_inventories["BEA Inventories"] = ( + bea_inventories["bea_ratio"] * bea_inventories["BEA Inventories"] + ) # the above hit the BEA control totals return bea_inventories @@ -177,117 +217,159 @@ def land(soi_data, bea_FA): # read in BEA data on residential fixed assets bea_residential = pd.read_excel( - _BEA_RES, sheet_name="Sheet0", skiprows=5, skipfooter=2) + _BEA_RES, sheet_name="Sheet0", skiprows=5, skipfooter=2 + ) bea_residential.reset_index() - bea_residential = bea_residential[[u'\xa0', '2013']].copy() - bea_residential.rename(columns={u"\xa0": "entity_type", - "2013": "Fixed Assets"}, - inplace=True) - bea_residential['Fixed Assets'] *= _BEA_INV_RES_FCTR - bea_residential['entity_type'] =\ - bea_residential['entity_type'].str.strip() - owner_occ_house_FA =\ - np.array(bea_residential.loc[bea_residential['entity_type'] == - 'Households', 'Fixed Assets']) - corp_res_FA =\ - np.array(bea_residential.loc[bea_residential['entity_type'] == - 'Corporate', 'Fixed Assets']) - noncorp_res_FA =\ - np.array(bea_residential.loc[bea_residential['entity_type'] == - 'Sole proprietorships and partnerships', - 'Fixed Assets']) + bea_residential = bea_residential[["\xa0", "2013"]].copy() + bea_residential.rename( + columns={"\xa0": "entity_type", "2013": "Fixed Assets"}, inplace=True + ) + bea_residential["Fixed Assets"] *= _BEA_INV_RES_FCTR + bea_residential["entity_type"] = bea_residential["entity_type"].str.strip() + owner_occ_house_FA = np.array( + bea_residential.loc[ + bea_residential["entity_type"] == "Households", "Fixed Assets" + ] + ) + corp_res_FA = np.array( + bea_residential.loc[ + bea_residential["entity_type"] == "Corporate", "Fixed Assets" + ] + ) + noncorp_res_FA = np.array( + bea_residential.loc[ + bea_residential["entity_type"] + == "Sole proprietorships and partnerships", + "Fixed Assets", + ] + ) # read in Financial Accounts data on total value of real estate in # owner occ sector (includes land and structures) - b101 = pd.read_csv(_B101_PATH, header=5, encoding='utf-8') + b101 = pd.read_csv(_B101_PATH, header=5, encoding="utf-8") b101.reset_index() - b101 = b101[['Unnamed: 0', '2013']].copy() - b101.rename(columns={"Unnamed: 0": "Variable", "2013": "Value"}, - inplace=True) - b101['Value'] *= _FIN_ACCT_FILE_FCTR - b101['Variable'] = b101['Variable'].str.strip() - owner_occ_house_RE =\ - np.array(b101.loc[b101['Variable'] == - 'Households; owner-occupied real estate ' + - 'including vacant land and mobile homes at ' + - 'market value', - 'Value']) + b101 = b101[["Unnamed: 0", "2013"]].copy() + b101.rename( + columns={"Unnamed: 0": "Variable", "2013": "Value"}, inplace=True + ) + b101["Value"] *= _FIN_ACCT_FILE_FCTR + b101["Variable"] = b101["Variable"].str.strip() + owner_occ_house_RE = np.array( + b101.loc[ + b101["Variable"] + == "Households; owner-occupied real estate " + + "including vacant land and mobile homes at " + + "market value", + "Value", + ] + ) # compute value of land for owner occupied housing sector owner_occ_house_land = owner_occ_house_RE - owner_occ_house_FA # create dictionary for owner-occupied housing to be appended to # final dataset with all assets - owner_occ_dict = {'minor_code_alt': [531115, 531115], - 'entity_type': ['owner_occupied_housing', - 'owner_occupied_housing'], - 'tax_treat': ['owner_occupied_housing', - 'owner_occupied_housing'], - 'assets': [np.asscalar(owner_occ_house_FA), - np.asscalar(owner_occ_house_land)], - 'Asset Type': ['Residential', 'Land'], - 'bea_ind_code': [5310, 5310], - 'bea_asset_code': ['RES', 'LAND']} + owner_occ_dict = { + "minor_code_alt": [531115, 531115], + "entity_type": ["owner_occupied_housing", "owner_occupied_housing"], + "tax_treat": ["owner_occupied_housing", "owner_occupied_housing"], + "assets": [ + np.asscalar(owner_occ_house_FA), + np.asscalar(owner_occ_house_land), + ], + "Asset Type": ["Residential", "Land"], + "bea_ind_code": [5310, 5310], + "bea_asset_code": ["RES", "LAND"], + } # update amout of land for non-corporate sector noncorp_land -= owner_occ_house_land # attribute land across tax treatment and industry using SOI data bea_land = soi_data.copy() - bea_land.loc[:, 'BEA Land'] = noncorp_land - bea_land.loc[bea_land['entity_type'] == 's_corp', 'BEA Land'] =\ - corp_land - bea_land.loc[bea_land['entity_type'] == 'c_corp', 'BEA Land'] =\ - corp_land - bea_land['BEA Corp'] = False - bea_land.loc[bea_land['entity_type'] == 's_corp', 'BEA Corp'] = True - bea_land.loc[bea_land['entity_type'] == 'c_corp', 'BEA Corp'] = True - bea_land['land_ratio'] =\ - bea_land.groupby(['BEA Corp'])['Land'].apply( - lambda x: x / float(x.sum())) - bea_land['BEA Land'] = bea_land['land_ratio'] * bea_land['BEA Land'] - bea_land = bea_land[['BEA Land', 'entity_type', 'tax_treat', - 'bea_code', 'minor_code_alt', - 'part_type']].copy() + bea_land.loc[:, "BEA Land"] = noncorp_land + bea_land.loc[bea_land["entity_type"] == "s_corp", "BEA Land"] = corp_land + bea_land.loc[bea_land["entity_type"] == "c_corp", "BEA Land"] = corp_land + bea_land["BEA Corp"] = False + bea_land.loc[bea_land["entity_type"] == "s_corp", "BEA Corp"] = True + bea_land.loc[bea_land["entity_type"] == "c_corp", "BEA Corp"] = True + bea_land["land_ratio"] = bea_land.groupby(["BEA Corp"])["Land"].apply( + lambda x: x / float(x.sum()) + ) + bea_land["BEA Land"] = bea_land["land_ratio"] * bea_land["BEA Land"] + bea_land = bea_land[ + [ + "BEA Land", + "entity_type", + "tax_treat", + "bea_code", + "minor_code_alt", + "part_type", + ] + ].copy() # total land attributed above matches Fin Accts totals for non-owner # occ housing attribute residential fixed assets across tax # treatment (they all go to one specific production sector) # attribute residential structures across entity types in proportion # to land - bea_res_assets = bea_FA[bea_FA['minor_code_alt'] == 531115].copy() - bea_res_assets.drop_duplicates(subset=['minor_code_alt', - 'entity_type', 'part_type', - 'tax_treat', 'bea_ind_code'], - inplace=True) + bea_res_assets = bea_FA[bea_FA["minor_code_alt"] == 531115].copy() + bea_res_assets.drop_duplicates( + subset=[ + "minor_code_alt", + "entity_type", + "part_type", + "tax_treat", + "bea_ind_code", + ], + inplace=True, + ) bea_res_assets = pd.DataFrame( - bea_res_assets.groupby(['minor_code_alt', 'entity_type', - 'part_type', 'tax_treat', - 'bea_ind_code'])['Land'].sum() - ).reset_index() - bea_res_assets.loc[:, 'BEA Res Assets'] = noncorp_res_FA - bea_res_assets.loc[bea_res_assets['entity_type'] == 's_corp', - 'BEA Res Assets'] = corp_res_FA - bea_res_assets.loc[bea_res_assets['entity_type'] == 'c_corp', - 'BEA Res Assets'] = corp_res_FA - bea_res_assets['BEA Corp'] = False - bea_res_assets.loc[bea_res_assets['entity_type'] == 's_corp', - 'BEA Corp'] = True - bea_res_assets.loc[bea_res_assets['entity_type'] == 'c_corp', - 'BEA Corp'] = True - bea_res_assets['res_FA_ratio'] =\ - bea_res_assets.groupby(['BEA Corp', - 'minor_code_alt'])['Land'].apply( - lambda x: x / float(x.sum())) - bea_res_assets['assets'] = (bea_res_assets['res_FA_ratio'] * - bea_res_assets['BEA Res Assets']) + bea_res_assets.groupby( + [ + "minor_code_alt", + "entity_type", + "part_type", + "tax_treat", + "bea_ind_code", + ] + )["Land"].sum() + ).reset_index() + bea_res_assets.loc[:, "BEA Res Assets"] = noncorp_res_FA + bea_res_assets.loc[ + bea_res_assets["entity_type"] == "s_corp", "BEA Res Assets" + ] = corp_res_FA + bea_res_assets.loc[ + bea_res_assets["entity_type"] == "c_corp", "BEA Res Assets" + ] = corp_res_FA + bea_res_assets["BEA Corp"] = False + bea_res_assets.loc[ + bea_res_assets["entity_type"] == "s_corp", "BEA Corp" + ] = True + bea_res_assets.loc[ + bea_res_assets["entity_type"] == "c_corp", "BEA Corp" + ] = True + bea_res_assets["res_FA_ratio"] = bea_res_assets.groupby( + ["BEA Corp", "minor_code_alt"] + )["Land"].apply(lambda x: x / float(x.sum())) + bea_res_assets["assets"] = ( + bea_res_assets["res_FA_ratio"] * bea_res_assets["BEA Res Assets"] + ) # create new asset category for residential structures - bea_res_assets['Asset Type'] = 'Residential' - bea_res_assets['bea_asset_code'] = 'RES' - bea_res_assets = bea_res_assets[['Asset Type', 'bea_asset_code', - 'bea_ind_code', 'minor_code_alt', - 'entity_type', 'tax_treat', - 'part_type', 'assets']].copy() + bea_res_assets["Asset Type"] = "Residential" + bea_res_assets["bea_asset_code"] = "RES" + bea_res_assets = bea_res_assets[ + [ + "Asset Type", + "bea_asset_code", + "bea_ind_code", + "minor_code_alt", + "entity_type", + "tax_treat", + "part_type", + "assets", + ] + ].copy() return bea_land, bea_res_assets, owner_occ_dict @@ -309,46 +391,84 @@ def combine(fixed_assets, inventories, land, res_assets, owner_occ_dict): land, residential, owner occupied housing) by industry and entity type """ - fixed_assets = fixed_assets[['assets', 'bea_asset_code', - 'bea_ind_code', 'Asset Type', - 'minor_code_alt', 'entity_type', - 'part_type', 'tax_treat']].copy() - inventories = inventories[['BEA Inventories', 'minor_code_alt', - 'entity_type', 'part_type', 'tax_treat', - 'bea_code']].copy() + fixed_assets = fixed_assets[ + [ + "assets", + "bea_asset_code", + "bea_ind_code", + "Asset Type", + "minor_code_alt", + "entity_type", + "part_type", + "tax_treat", + ] + ].copy() + inventories = inventories[ + [ + "BEA Inventories", + "minor_code_alt", + "entity_type", + "part_type", + "tax_treat", + "bea_code", + ] + ].copy() inventories.rename( - columns={"BEA Inventories": "assets", - "bea_code": "bea_ind_code"}, inplace=True) - inventories['Asset Type'] = 'Inventories' - inventories['bea_asset_code'] = 'INV' - land = land[['BEA Land', 'entity_type', 'part_type', 'tax_treat', - 'bea_code', 'minor_code_alt']].copy() - land.rename(columns={"BEA Land": "assets", - "bea_code": "bea_ind_code"}, inplace=True) - land['Asset Type'] = 'Land' - land['bea_asset_code'] = 'LAND' + columns={"BEA Inventories": "assets", "bea_code": "bea_ind_code"}, + inplace=True, + ) + inventories["Asset Type"] = "Inventories" + inventories["bea_asset_code"] = "INV" + land = land[ + [ + "BEA Land", + "entity_type", + "part_type", + "tax_treat", + "bea_code", + "minor_code_alt", + ] + ].copy() + land.rename( + columns={"BEA Land": "assets", "bea_code": "bea_ind_code"}, + inplace=True, + ) + land["Asset Type"] = "Land" + land["bea_asset_code"] = "LAND" # append dataframes to each other - asset_data = fixed_assets.append( - [inventories, land, res_assets], sort=True, - ignore_index=True).copy().reset_index() + asset_data = ( + fixed_assets.append( + [inventories, land, res_assets], sort=True, ignore_index=True + ) + .copy() + .reset_index() + ) # add owner occupied housing by appending dictionary owner_occ = pd.DataFrame.from_dict(owner_occ_dict) - asset_data = asset_data.append( - owner_occ, sort=True, ignore_index=True).copy().reset_index() + asset_data = ( + asset_data.append(owner_occ, sort=True, ignore_index=True) + .copy() + .reset_index() + ) # Merge industry names to asset data # Read in BEA industry names bea_ind_names = pd.read_excel( - _BEA_ASSET_PATH, sheet_name="readme", - converters={'BEA CODE': str}, header=14) - bea_ind_names = bea_ind_names[['INDUSTRY TITLE ', 'BEA CODE']] - bea_ind_names.dropna(subset=['BEA CODE'], inplace=True) - bea_ind_names.rename(columns={"INDUSTRY TITLE ": "Industry", - "BEA CODE": "bea_ind_code"}, - inplace=True) + _BEA_ASSET_PATH, + sheet_name="readme", + converters={"BEA CODE": str}, + header=14, + ) + bea_ind_names = bea_ind_names[["INDUSTRY TITLE ", "BEA CODE"]] + bea_ind_names.dropna(subset=["BEA CODE"], inplace=True) + bea_ind_names.rename( + columns={"INDUSTRY TITLE ": "Industry", "BEA CODE": "bea_ind_code"}, + inplace=True, + ) asset_data = asset_data.merge( - bea_ind_names, how='left', on=['bea_ind_code'], copy=True) + bea_ind_names, how="left", on=["bea_ind_code"], copy=True + ) return asset_data diff --git a/data/soi_processing.py b/data/soi_processing.py index 8e1759d6..2be40f4b 100644 --- a/data/soi_processing.py +++ b/data/soi_processing.py @@ -6,13 +6,16 @@ of these entities. Also provides auxiliary scripts to format the partner and proprietorship dataframes and to interpolate missing data. """ + # Import packages import pandas as pd + # Import custom modules import pull_soi_corp as corp import pull_soi_partner as prt import pull_soi_proprietorship as prop from data_paths import get_paths + globals().update(get_paths()) @@ -33,45 +36,58 @@ def pull_soi_data(): entity_dfs.update(prop.load_proprietorship_data(entity_dfs)) # make one big data frame - by industry and entity type - c_corp = entity_dfs['c_corp'][['minor_code_alt', 'Land', - 'Fixed Assets', 'Inventories']].copy() - c_corp.loc[:, 'entity_type'] = 'c_corp' - s_corp = entity_dfs['s_corp'][['minor_code_alt', 'Land', - 'Fixed Assets', 'Inventories']].copy() - s_corp.loc[:, 'entity_type'] = 's_corp' - partner = entity_dfs['part_data'][['minor_code_alt', 'Land', - 'Fixed Assets', 'Inventories', - 'part_type']].copy() - partner.loc[:, 'entity_type'] = 'partnership' - sole_prop = entity_dfs['sole_prop_data'][['minor_code_alt', 'Land', - 'Fixed Assets', - 'Inventories']].copy() - sole_prop.loc[:, 'entity_type'] = 'sole_prop' + c_corp = entity_dfs["c_corp"][ + ["minor_code_alt", "Land", "Fixed Assets", "Inventories"] + ].copy() + c_corp.loc[:, "entity_type"] = "c_corp" + s_corp = entity_dfs["s_corp"][ + ["minor_code_alt", "Land", "Fixed Assets", "Inventories"] + ].copy() + s_corp.loc[:, "entity_type"] = "s_corp" + partner = entity_dfs["part_data"][ + ["minor_code_alt", "Land", "Fixed Assets", "Inventories", "part_type"] + ].copy() + partner.loc[:, "entity_type"] = "partnership" + sole_prop = entity_dfs["sole_prop_data"][ + ["minor_code_alt", "Land", "Fixed Assets", "Inventories"] + ].copy() + sole_prop.loc[:, "entity_type"] = "sole_prop" - soi_data = c_corp.append([s_corp, partner, sole_prop], sort=True, - ignore_index=True).copy().reset_index() - soi_data['part_type'] = soi_data['part_type'].fillna('Not a partnership') + soi_data = ( + c_corp.append( + [s_corp, partner, sole_prop], sort=True, ignore_index=True + ) + .copy() + .reset_index() + ) + soi_data["part_type"] = soi_data["part_type"].fillna("Not a partnership") # merge to industry codes xwalk, which will be helpful when merging # with BEA data # create ratios for minor industry assets using corporate data # read in crosswalk for bea and soi industry codes soi_bea_ind_codes = pd.read_csv( - _SOI_BEA_CROSS, dtype={'bea_ind_code': str}) - soi_bea_ind_codes.drop('notes', axis=1, inplace=True) + _SOI_BEA_CROSS, dtype={"bea_ind_code": str} + ) + soi_bea_ind_codes.drop("notes", axis=1, inplace=True) # drop one repeated minor ind code in crosswalk - soi_bea_ind_codes.drop_duplicates( - subset=['minor_code_alt'], inplace=True) - soi_data['tax_treat'] = 'non-corporate' - soi_data.loc[soi_data['entity_type'] == 'c_corp', 'tax_treat'] =\ - 'corporate' - soi_data.loc[(soi_data['entity_type'] == 'partnership') & - (soi_data['part_type'] == 'Corporate general partners'), - 'tax_treat'] = 'corporate' - soi_data.loc[(soi_data['entity_type'] == 'partnership') & - (soi_data['part_type'] == 'Corporate limited partners'), - 'tax_treat'] = 'corporate' - soi_data = soi_data.merge(soi_bea_ind_codes, how='left', - on=['minor_code_alt'], copy=True) + soi_bea_ind_codes.drop_duplicates(subset=["minor_code_alt"], inplace=True) + soi_data["tax_treat"] = "non-corporate" + soi_data.loc[soi_data["entity_type"] == "c_corp", "tax_treat"] = ( + "corporate" + ) + soi_data.loc[ + (soi_data["entity_type"] == "partnership") + & (soi_data["part_type"] == "Corporate general partners"), + "tax_treat", + ] = "corporate" + soi_data.loc[ + (soi_data["entity_type"] == "partnership") + & (soi_data["part_type"] == "Corporate limited partners"), + "tax_treat", + ] = "corporate" + soi_data = soi_data.merge( + soi_bea_ind_codes, how="left", on=["minor_code_alt"], copy=True + ) return soi_data diff --git a/environment.yml b/environment.yml index b65c44e7..1dbe83ab 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,7 @@ dependencies: - pycodestyle - pylint - coverage +- black - pip: - jupyter-book>=0.9.1 - "cs-kit>=1.16.8" diff --git a/example.py b/example.py index 869646bc..042318ef 100644 --- a/example.py +++ b/example.py @@ -2,6 +2,7 @@ Runs Cost-of-Capital-Calculator with TCJA as baseline and 2017 law as reform ---------------------------------------------------------------------------- """ + # import support packages and Cost-of-Capital-Calculator classes and function from bokeh.io import show import taxcalc @@ -13,16 +14,23 @@ # specify individual income and business tax reform to compare against # ... Note that TCJA is current-law baseline in Tax-Calculator, # so to compare TCJA to 2017 law, we'll use 2017 law as the reform -reform_url = ('https://raw.githubusercontent.com/' - 'PSLmodels/Tax-Calculator/master/taxcalc/' - 'reforms/2017_law.json') +reform_url = ( + "https://raw.githubusercontent.com/" + "PSLmodels/Tax-Calculator/master/taxcalc/" + "reforms/2017_law.json" +) iit_reform = taxcalc.Policy.read_json_reform(reform_url) # ... specify reform that implements pre-TCJA business tax policy cyr = 2019 business_tax_reform = { - 'CIT_rate': 0.35, 'BonusDeprec_3yr': 0.50, 'BonusDeprec_5yr': 0.50, - 'BonusDeprec_7yr': 0.50, 'BonusDeprec_10yr': 0.50, - 'BonusDeprec_15yr': 0.50, 'BonusDeprec_20yr': 0.50} + "CIT_rate": 0.35, + "BonusDeprec_3yr": 0.50, + "BonusDeprec_5yr": 0.50, + "BonusDeprec_7yr": 0.50, + "BonusDeprec_10yr": 0.50, + "BonusDeprec_15yr": 0.50, + "BonusDeprec_20yr": 0.50, +} # specify baseline and reform Calculator objects for 2019 calculations assets = Assets() @@ -44,12 +52,12 @@ diff_industry_df = diff_two_tables(reform_industry_df, baseln_industry_df) # save dataframes to disk as csv files in this directory -baseln_industry_df.to_csv('baseline_byindustry.csv', float_format='%.5f') -reform_industry_df.to_csv('reform_byindustry.csv', float_format='%.5f') -baseln_assets_df.to_csv('baseline_byasset.csv', float_format='%.5f') -reform_assets_df.to_csv('reform_byasset.csv', float_format='%.5f') -diff_industry_df.to_csv('changed_byindustry.csv', float_format='%.5f') -diff_assets_df.to_csv('changed_byasset.csv', float_format='%.5f') +baseln_industry_df.to_csv("baseline_byindustry.csv", float_format="%.5f") +reform_industry_df.to_csv("reform_byindustry.csv", float_format="%.5f") +baseln_assets_df.to_csv("baseline_byasset.csv", float_format="%.5f") +reform_assets_df.to_csv("reform_byasset.csv", float_format="%.5f") +diff_industry_df.to_csv("changed_byindustry.csv", float_format="%.5f") +diff_assets_df.to_csv("changed_byasset.csv", float_format="%.5f") # create and show in browser a range plot p = calc1.range_plot(calc2) diff --git a/setup.py b/setup.py index 041e241b..5b6cd574 100644 --- a/setup.py +++ b/setup.py @@ -3,39 +3,39 @@ except ImportError: from distutils.core import setup -with open('README.md') as f: +with open("README.md") as f: longdesc = f.read() -version = '1.2.11' +version = "1.2.11" config = { - 'description': 'CCC: A Cost of Capital Calculator', - 'url': 'https://github.com/PSLmodels/Cost-of-Capital-Calculator', - 'download_url': 'https://github.com/PSLmodels/Cost-of-Capital-Calculator', - 'long_description_content_type': 'text/markdown', - 'long_description': longdesc, - 'version': version, - 'license': 'CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', - 'packages': ['ccc'], - 'include_package_data': True, - 'name': 'cost-of-capital-calculator', - 'install_requires': ['taxcalc', 'pandas', 'bokeh', 'numpy', - 'paramtools'], - 'classifiers': [ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Natural Language :: English', - 'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Topic :: Software Development :: Libraries :: Python Modules'], - 'tests_require': ['pytest'] + "description": "CCC: A Cost of Capital Calculator", + "url": "https://github.com/PSLmodels/Cost-of-Capital-Calculator", + "download_url": "https://github.com/PSLmodels/Cost-of-Capital-Calculator", + "long_description_content_type": "text/markdown", + "long_description": longdesc, + "version": version, + "license": "CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", + "packages": ["ccc"], + "include_package_data": True, + "name": "cost-of-capital-calculator", + "install_requires": ["taxcalc", "pandas", "bokeh", "numpy", "paramtools"], + "classifiers": [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Natural Language :: English", + "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + "tests_require": ["pytest"], } setup(**config)