PSLmodels · martinholmer · Jul 20, 2017 · Jul 17, 2017 · Jul 17, 2017 · Jul 17, 2017
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -9,3 +9,5 @@ include taxcalc/behavior.json
 include taxcalc/growth.json
 include taxcalc/consumption.json
 include taxcalc/records_variables.json
+include taxcalc/cps.csv.gz
+include taxcalc/cps_weights.csv.gz
diff --git a/RELEASES.md b/RELEASES.md
@@ -13,7 +13,12 @@ Release 0.9.2 on 2017-??-??
 - None
 
 **New Features**
-- None
+- Add several taxcalc/reforms/earnings_shifting.* files that analyze the revenue implications of high-paid workers forming personal LLCs to contract with their former employers under the Trump2017.json reform
+  [[#1464](https://github.com/open-source-economics/Tax-Calculator/pull/1464)
+  by Martin Holmer]
+- Add ability to read and calculate taxes with new CPS input data for 2014 and subsequent years
+  [[#1484](https://github.com/open-source-economics/Tax-Calculator/pull/1484)
+  by Martin Holmer]
 
 **Bug Fixes**
 - Fix decorators bug that appeared when numpy 1.13.1, and pandas 0.20.2 that uses numpy 1.13, recently became available

diff --git a/conda.recipe/install_local_taxcalc_package.sh b/conda.recipe/install_local_taxcalc_package.sh
@@ -48,7 +48,7 @@ rmdir dist/
 rm -fr taxcalc.egg-info/*
 rmdir taxcalc.egg-info/
 
-echo "Execute 'conda uninstall taxcalc' after using taxcalc package"
+echo "Execute 'conda uninstall taxcalc --yes' after using taxcalc package"
 
 echo "FINISHED : `date`"
 exit 0
diff --git a/docs/index.html b/docs/index.html
diff --git a/taxcalc/calculate.py b/taxcalc/calculate.py
@@ -102,6 +102,8 @@ def __init__(self, policy=None, records=None, verbose=True,
             self.records = records
         else:
             raise ValueError('must specify records as a Records object')
+        if self.policy.current_year < self.records.data_year:
+            self.policy.set_year(self.records.data_year)
         if consumption is None:
             self.consumption = Consumption(start_year=policy.start_year)
         elif isinstance(consumption, Consumption):
@@ -120,10 +122,10 @@ def __init__(self, policy=None, records=None, verbose=True,
                 self.behavior.set_year(next_year)
         else:
             raise ValueError('behavior must be None or Behavior object')
-        if sync_years and self.records.current_year == Records.PUF_YEAR:
+        if sync_years and self.records.current_year == self.records.data_year:
             if verbose:
                 print('You loaded data for ' +
-                      str(self.records.current_year) + '.')
+                      str(self.records.data_year) + '.')
                 if len(self.records.IGNORED_VARS) > 0:
                     print('Your data include the following unused ' +
                           'variables that will be ignored:')
@@ -143,6 +145,7 @@ def calc_all(self, zero_out_calc_vars=False):
         Call all tax-calculation functions.
         """
         # conducts static analysis of Calculator object for current_year
+        assert self.records.current_year == self.policy.current_year
         self._calc_one_year(zero_out_calc_vars)
         BenefitSurtax(self)
         BenefitLimitation(self)

diff --git a/taxcalc/cli/tc.py b/taxcalc/cli/tc.py
@@ -37,7 +37,9 @@ def cli_tc_main():
     parser.add_argument('INPUT', nargs='?',
                         help=('INPUT is name of CSV-formatted file that '
                               'contains for each filing unit variables used '
-                              'to compute taxes for TAXYEAR.'),
+                              'to compute taxes for TAXYEAR. Specifying '
+                              '"cps.csv" uses CPS input files included in '
+                              'the taxcalc package.'),
                         default='')
     parser.add_argument('TAXYEAR', nargs='?',
                         help=('TAXYEAR is calendar year for which taxes '

diff --git a/taxcalc/cps.csv.gz b/taxcalc/cps.csv.gz
diff --git a/taxcalc/cps_weights.csv.gz b/taxcalc/cps_weights.csv.gz
diff --git a/taxcalc/growfactors.py b/taxcalc/growfactors.py
@@ -53,9 +53,12 @@ def __init__(self, growfactors_filename=FILE_PATH):
             # pylint: disable=redefined-variable-type
             # (above because pylint mistakenly thinks gfdf is not a DataFrame)
             if os.path.isfile(growfactors_filename):
-                gfdf = pd.read_csv(growfactors_filename, index_col='YEAR')
+                gfdf = pd.read_csv(growfactors_filename,
+                                   index_col='YEAR')
             else:
-                gfdf = read_egg_csv(Growfactors.FILENAME, index_col='YEAR')
+                # cannot call read_egg_ function in unit tests
+                gfdf = read_egg_csv(Growfactors.FILENAME,
+                                    index_col='YEAR')  # pragma: no cover
         else:
             raise ValueError('growfactors_filename is not a string')
         assert isinstance(gfdf, pd.DataFrame)

diff --git a/taxcalc/parameters.py b/taxcalc/parameters.py
@@ -237,7 +237,9 @@ def _params_dict_from_json_file(cls):
                 params_dict = json.load(pfile,
                                         object_pairs_hook=collect.OrderedDict)
         else:
-            params_dict = read_egg_json(cls.DEFAULTS_FILENAME)
+            # cannot call read_egg_ function in unit tests
+            params_dict = read_egg_json(
+                cls.DEFAULTS_FILENAME)  # pragma: no cover
         return params_dict
 
     def _update(self, year_mods):

diff --git a/taxcalc/records.py b/taxcalc/records.py
@@ -15,6 +15,7 @@
 
 
 PUFCSV_YEAR = 2009
+CPSCSV_YEAR = 2014
 
 
 class Records(object):
@@ -39,20 +40,20 @@ class Records(object):
     gfactors: Growfactors class instance or None
         containing record data extrapolation (or "blowup") factors
 
-    adjust_ratios: string or Pandas DataFrame or None
-        string describes CSV file in which adjustment ratios reside;
-        DataFrame already contains adjustment ratios;
-        None creates empty adjustment-ratios DataFrame;
-        default value is filename of the default adjustment ratios.
-
     weights: string or Pandas DataFrame or None
         string describes CSV file in which weights reside;
         DataFrame already contains weights;
         None creates empty sample-weights DataFrame;
-        default value is filename of the default weights.
+        default value is filename of the PUF weights.
+
+    adjust_ratios: string or Pandas DataFrame or None
+        string describes CSV file in which adjustment ratios reside;
+        DataFrame already contains adjustment ratios;
+        None creates empty adjustment-ratios DataFrame;
+        default value is filename of the PUF adjustment ratios.
 
     start_year: integer
-        specifies calendar year of the data;
+        specifies calendar year of the input data;
         default value is PUFCSV_YEAR.
         Note that if specifying your own data (see above) as being a custom
         data set, be sure to explicitly set start_year to the
@@ -78,7 +79,7 @@ class instance: Records
 
     Notes
     -----
-    Typical usage is as follows::
+    Typical usage when using PUF input data is as follows::
 
         recs = Records()
 
@@ -88,6 +89,9 @@ class instance: Records
     situations in which you need to specify the values of the Record
     constructor's arguments, but be sure you know exactly what you are
     doing when attempting this.
+
+    Use Records.cps_constructor() to get a Records object instantiated
+    with CPS input data.
     """
     # suppress pylint warnings about unrecognized Records variables:
     # pylint: disable=no-member
@@ -96,23 +100,22 @@ class instance: Records
     # suppress pylint warnings about too many class instance attributes:
     # pylint: disable=too-many-instance-attributes
 
-    PUF_YEAR = PUFCSV_YEAR
     CUR_PATH = os.path.abspath(os.path.dirname(__file__))
-    WEIGHTS_FILENAME = 'puf_weights.csv'
-    WEIGHTS_PATH = os.path.join(CUR_PATH, WEIGHTS_FILENAME)
-    ADJUST_RATIOS_FILENAME = 'puf_ratios.csv'
-    ADJUST_RATIOS_PATH = os.path.join(CUR_PATH, ADJUST_RATIOS_FILENAME)
+    PUF_WEIGHTS_FILENAME = 'puf_weights.csv'
+    PUF_RATIOS_FILENAME = 'puf_ratios.csv'
+    CPS_WEIGHTS_FILENAME = 'cps_weights.csv.gz'
+    CPS_RATIOS_FILENAME = None
     VAR_INFO_FILENAME = 'records_variables.json'
-    VAR_INFO_PATH = os.path.join(CUR_PATH, VAR_INFO_FILENAME)
 
     def __init__(self,
                  data='puf.csv',
                  exact_calculations=False,
                  gfactors=Growfactors(),
-                 weights=WEIGHTS_PATH,
-                 adjust_ratios=ADJUST_RATIOS_PATH,
+                 weights=PUF_WEIGHTS_FILENAME,
+                 adjust_ratios=PUF_RATIOS_FILENAME,
                  start_year=PUFCSV_YEAR):
         # pylint: disable=too-many-arguments
+        self._data_year = start_year
         # read specified data
         self._read_data(data, exact_calculations)
         # check that three sets of split-earnings variables have valid values
@@ -143,7 +146,7 @@ def __init__(self,
         self.WT = None
         self._read_weights(weights)
         self.ADJ = None
-        self._read_adjust(adjust_ratios)
+        self._read_ratios(adjust_ratios)
         # weights must be same size as tax record data
         if not self.WT.empty and self.dim != len(self.WT):
             # scale-up sub-sample weights by year-specific factor
@@ -160,13 +163,40 @@ def __init__(self,
             msg = 'start_year is not an integer'
             raise ValueError(msg)
         # consider applying initial-year grow factors
-        if gfactors is not None and start_year == Records.PUF_YEAR:
+        if gfactors is not None and start_year == self._data_year:
             self._blowup(start_year)
         # construct sample weights for current_year
         wt_colname = 'WT{}'.format(self.current_year)
         if wt_colname in self.WT.columns:
             self.s006 = self.WT[wt_colname] * 0.01
 
+    @staticmethod
+    def cps_constructor(exact_calculations=False,
+                        growfactors=Growfactors()):
+        """
+        Static method returns a Records object instantiated with CPS
+        input data.  This works in a analogous way to Records(), which
+        returns a Records object instantiated with PUF input data.
+        This is a convenience method that eliminates the need to
+        specify all the details of the CPS input data just as the
+        default values of the arguments of the Records class constructor
+        eliminate the need to specify all the details of the PUF input
+        data.
+        """
+        return Records(data=os.path.join(Records.CUR_PATH, 'cps.csv.gz'),
+                       exact_calculations=exact_calculations,
+                       gfactors=growfactors,
+                       weights=Records.CPS_WEIGHTS_FILENAME,
+                       adjust_ratios=Records.CPS_RATIOS_FILENAME,
+                       start_year=CPSCSV_YEAR)
+
+    @property
+    def data_year(self):
+        """
+        Records class original data year property.
+        """
+        return self._data_year
+
     @property
     def current_year(self):
         """
@@ -206,11 +236,15 @@ def read_var_info():
         Read Records variables metadata from JSON file;
         returns dictionary and specifies static varname sets listed below.
         """
-        if os.path.exists(Records.VAR_INFO_PATH):
-            with open(Records.VAR_INFO_PATH) as vfile:
+        var_info_path = os.path.join(Records.CUR_PATH,
+                                     Records.VAR_INFO_FILENAME)
+        if os.path.exists(var_info_path):
+            with open(var_info_path) as vfile:
                 vardict = json.load(vfile)
         else:
-            vardict = read_egg_json(Records.VAR_INFO_FILENAME)
+            # cannot call read_egg_ function in unit tests
+            vardict = read_egg_json(
+                Records.VAR_INFO_FILENAME)  # pragma: no cover
         Records.INTEGER_READ_VARS = set(k for k, v in vardict['read'].items()
                                         if v['type'] == 'int')
         FLOAT_READ_VARS = set(k for k, v in vardict['read'].items()
@@ -362,10 +396,11 @@ def _read_data(self, data, exact_calcs):
         if isinstance(data, pd.DataFrame):
             taxdf = data
         elif isinstance(data, six.string_types):
-            if data.endswith('gz'):
-                taxdf = pd.read_csv(data, compression='gzip')
-            else:
+            if os.path.isfile(data):
                 taxdf = pd.read_csv(data)
+            else:
+                # cannot call read_egg_ function in unit tests
+                taxdf = read_egg_csv(data)  # pragma: no cover
         else:
             msg = 'data is neither a string nor a Pandas DataFrame'
             raise ValueError(msg)
@@ -430,36 +465,43 @@ def _read_weights(self, weights):
         if isinstance(weights, pd.DataFrame):
             WT = weights
         elif isinstance(weights, six.string_types):
-            if os.path.isfile(weights):
+            weights_path = os.path.join(Records.CUR_PATH, weights)
+            if os.path.isfile(weights_path):
                 # pylint: disable=redefined-variable-type
                 # (above because pylint mistakenly thinks WT not a DataFrame)
-                WT = pd.read_csv(weights)
+                WT = pd.read_csv(weights_path)
             else:
-                WT = read_egg_csv(Records.WEIGHTS_FILENAME)
+                # cannot call read_egg_ function in unit tests
+                WT = read_egg_csv(
+                    os.path.basename(weights_path))  # pragma: no cover
         else:
             msg = 'weights is not None or a string or a Pandas DataFrame'
             raise ValueError(msg)
         assert isinstance(WT, pd.DataFrame)
         setattr(self, 'WT', WT)
 
-    def _read_adjust(self, adjust_ratios):
+    def _read_ratios(self, ratios):
         """
         Read Records adjustment ratios from file or uses specified DataFrame
         as data or creates empty DataFrame if None
         """
-        if adjust_ratios is None:
+        if ratios is None:
             ADJ = pd.DataFrame({'nothing': []})
             setattr(self, 'ADJ', ADJ)
             return
-        if isinstance(adjust_ratios, pd.DataFrame):
-            ADJ = adjust_ratios
-        elif isinstance(adjust_ratios, six.string_types):
-            if os.path.isfile(adjust_ratios):
+        if isinstance(ratios, pd.DataFrame):
+            ADJ = ratios
+        elif isinstance(ratios, six.string_types):
+            ratios_path = os.path.join(Records.CUR_PATH, ratios)
+            if os.path.isfile(ratios_path):
                 # pylint: disable=redefined-variable-type
                 # (above because pylint mistakenly thinks ADJ not a DataFrame)
-                ADJ = pd.read_csv(adjust_ratios, index_col=0)
+                ADJ = pd.read_csv(ratios_path,
+                                  index_col=0)
             else:
-                ADJ = read_egg_csv(Records.ADJUST_RATIOS_FILENAME, index_col=0)
+                # cannot call read_egg_ function in unit tests
+                ADJ = read_egg_csv(os.path.basename(ratios_path),
+                                   index_col=0)  # pragma: no cover
             ADJ = ADJ.transpose()
         else:
             msg = ('adjust_ratios is not None or a string'