diff --git a/cps_data/adjustment_targets.csv b/cps_data/adjustment_targets.csv
new file mode 100644
index 00000000..5d1807dd
--- /dev/null
+++ b/cps_data/adjustment_targets.csv
@@ -0,0 +1,20 @@
+INT,ODIV,QDIV,BIZ
+4688264,4713140,3095521,-8287987
+541622,999149,639115,2836970
+770262,1442485,859292,11990777
+1002678,1876844,1137469,22132182
+1147555,1798693,1137002,14556107
+1125760,1957918,1234331,10372683
+1340811,1960817,1183817,9243160
+2096189,4116883,2794332,15956819
+1900496,4673360,2996252,13332856
+5955852,14387800,9906058,28063051
+5013047,14339274,9921453,25009982
+13390182,43136765,31985381,66676016
+12725861,44531293,34400475,60723838
+6814230,24570303,19243443,22953218
+3825975,10497137,8124591,6770746
+2582189,7490783,5750856,3743423
+7098608,19187445,14940470,6606643
+4547632,11314258,8846877,2419024
+17327068,41707885,34250973,2159257
\ No newline at end of file
diff --git a/cps_data/cps.csv.gz b/cps_data/cps.csv.gz
new file mode 100644
index 00000000..1788976a
Binary files /dev/null and b/cps_data/cps.csv.gz differ
diff --git a/cps_data/cps_raw.csv.gz b/cps_data/cps_raw.csv.gz
new file mode 100644
index 00000000..f7438148
Binary files /dev/null and b/cps_data/cps_raw.csv.gz differ
diff --git a/cps_data/finalprep.py b/cps_data/finalprep.py
index c09cca39..8ba1b930 100644
--- a/cps_data/finalprep.py
+++ b/cps_data/finalprep.py
@@ -1,158 +1,375 @@
 import pandas as pd
 import numpy as np
+import sys
+import copy
+import subprocess
 
-# Import production file
-data = pd.read_csv('prod2015_v2e.csv')
-
-# Rename variables where possible
-renames = {
-    'IFDEPT': 'DSI',
-    'TAXYEAR': 'FLPDYR',
-    'XXTOT': 'XTOT',
-    'JCPS21': 'e00200p',
-    'JCPS31': 'e00200s',
-    'ALIMONY': 'e00800',
-    'JCPS25': 'e00900p',
-    'JCPS35': 'e00900s',
-    'JCPS28': 'e02100p',
-    'JCPS38': 'e02100s',
-    'UCOMP': 'e02300',
-    'SOCSEC': 'e02400',
-    'SEHEALTH': 'e03270',
-    'DPAD': 'e03240',
-    'MEDICALEXP': 'e17500',
-    'REALEST': 'e18500',
-    'MISCITEM': 'e20400',
-    'CCE': 'e32800',
-    'ICPS01': 'age_head',
-    'ICPS02': 'age_spouse',
-    'WT': 's006',
-    'FILST': 'filer',
-    'SEQUENCE': 'RECID',
-    'PENSIONS': 'e01700',
-    'DBE': 'e00600',
-    'KEOGH': 'e03300',
-    'TIRAD': 'e01400'
-}
-
-data = data.rename(columns=renames)
-
-# Adjust MARS to address lack of married filing separately status
-# 1 = Single filers
-# 2 = Married filing jointly
-# 4 = Head of household
-data['MARS'] = np.where(data.JS == 3, 4, data.JS)
-
-# Use primary taxpayer and spouse records to get total tax unit earnings
-data['e00200'] = data.e00200p + data.e00200s
-data['e00900'] = data.e00900p + data.e00900s
-data['e02100'] = data.e02100p + data.e02100s
-
-# Impute variables where possible
-
-# Determine amount of qualified dividends using IRS ratio
-data['e00650'] = data.e00600 * 0.7556
-
-# Split interest income into taxable and tax exempt using IRS ratio
-taxable = 0.6
-nontaxable = 1. - taxable
-data['e00300'] = data.INTST * taxable
-data['e00400'] = data.INTST * nontaxable
-
-# Apply charitable deduction limit
-halfAGI = (data.JCPS9 + data.JCPS19) * 0.5
-charity = np.where(data.CHARITABLE > halfAGI,
-                   halfAGI, data.CHARITABLE)
-# Split charitable giving into cash and non-cash using ratio in PUF
-cash = 0.82013
-non_cash = 1. - cash
-data['e19800'] = charity * cash
-data['e20100'] = charity * non_cash
-
-# Apply student loan interest deduction limit
-data['e03210'] = np.where(data.SLINT > 2500, 2500, data.SLINT)
-
-# Apply IRA contribution limits
-deductibleIRA = np.where(data.AGE >= 50,
-                         np.where(data.ADJIRA > 6500, 6500, data.ADJIRA),
-                         np.where(data.ADJIRA > 5500, 5500, data.ADJIRA))
-data['e03150'] = deductibleIRA
-
-# Count number of dependents under 13
-# Max of four to match PUF version of nu13
-age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 <= 13), 1, 0)
-age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 <= 13), 1, 0)
-age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 <= 13), 1, 0)
-age4 = np.where((data.ICPS06 > 0) & (data.ICPS06 <= 13), 1, 0)
-nu13 = age1 + age2 + age3 + age4
-data['nu13'] = nu13
-
-# Count number of dependents under 5
-age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 <= 5), 1, 0)
-age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 <= 5), 1, 0)
-age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 <= 5), 1, 0)
-age4 = np.where((data.ICPS06 > 0) & (data.ICPS06 <= 5), 1, 0)
-age5 = np.where((data.ICPS07 > 0) & (data.ICPS06 <= 5), 1, 0)
-nu05 = age1 + age2 + age3 + age4 + age5
-data['nu05'] = nu05
-
-# Count number of children eligible for child tax credit
-# Max of three to mach PUF version of n24
-age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 <= 17), 1, 0)
-age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 <= 17), 1, 0)
-age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 <= 17), 1, 0)
-n24 = age1 + age2 + age3
-data['n24'] = n24
-
-# Count number of elderly dependents
-age1 = np.where(data.ICPS03 >= 65, 1, 0)
-age2 = np.where(data.ICPS04 >= 65, 1, 0)
-age3 = np.where(data.ICPS05 >= 65, 1, 0)
-age4 = np.where(data.ICPS06 >= 65, 1, 0)
-age5 = np.where(data.ICPS07 >= 65, 1, 0)
-elderly = age1 + age2 + age3 + age4 + age5
-data['elderly_dependent'] = elderly
-
-# List of usable variables in TaxCalc
-USABLE_READ_VARS = [
-        'DSI', 'EIC', 'FLPDYR',
-        'f2441', 'f6251', 'n24', 'XTOT',
-        'e00200', 'e00300', 'e00400', 'e00600', 'e00650', 'e00700', 'e00800',
-        'e00200p', 'e00200s',
-        'e00900', 'e01100', 'e01200', 'e01400', 'e01500', 'e01700',
-        'e00900p', 'e00900s',
-        'e02000', 'e02100', 'e02300', 'e02400', 'e03150', 'e03210',
-        'e02100p', 'e02100s',
-        'e03220', 'e03230', 'e03270', 'e03240', 'e03290',
-        'e03400', 'e03500',
-        'e07240', 'e07260', 'e07300',
-        'e07400', 'e07600', 'p08000',
-        'e09700', 'e09800', 'e09900',
-        'e11200',
-        'e17500', 'e18400', 'e18500',
-        'e19200', 'e19800', 'e20100',
-        'e20400', 'e20500', 'p22250',
-        'p23250', 'e24515', 'e24518',
-        'p25470',
-        'e26270',
-        'e27200', 'e32800', 'e03300',
-        'e58990',
-        'e62900',
-        'p87521', 'e87530',
-        'MARS', 'MIDR', 'RECID', 'filer',
-        'cmbtp_standard', 'cmbtp_itemizer',
-        'age_head', 'age_spouse', 'blind_head', 'blind_spouse',
-        'nu13', 'elderly_dependent',
-        's006', 'nu05']
-
-# Remove unnecessary variables
-drop_vars = []
-var_list = list(data.columns)
-for item in var_list:
-    if item not in USABLE_READ_VARS:
-        drop_vars.append(item)
-data.drop(drop_vars, axis=1, inplace=True)
-data.fillna(0, inplace=True)
-
-# Write processed file to a CSV
-data.to_csv('cps.csv', index=False)
+
+def main():
+
+    # Import CPS data file
+    data = pd.read_csv('cps_raw.csv.gz', compression='gzip')
+    adj_targets = pd.read_csv('adjustment_targets.csv')
+    # other_ben = pd.read_csv('benefitprograms.csv')
+
+    # Rename specified variables
+    renames = {
+        'IFDEPT': 'DSI',
+        'TAXYEAR': 'FLPDYR',
+        'XXTOT': 'XTOT',
+        'JCPS21': 'e00200p',
+        'JCPS31': 'e00200s',
+        'ALIMONY': 'e00800',
+        'JCPS25': 'e00900p',
+        'JCPS35': 'e00900s',
+        'JCPS28': 'e02100p',
+        'JCPS38': 'e02100s',
+        'UCOMP': 'e02300',
+        'SOCSEC': 'e02400',
+        'SEHEALTH': 'e03270',
+        'DPAD': 'e03240',
+        'MEDICALEXP': 'e17500',
+        'REALEST': 'e18500',
+        'MISCITEM': 'e20400',
+        'CCE': 'e32800',
+        'ICPS01': 'age_head',
+        'ICPS02': 'age_spouse',
+        'WT': 's006',
+        'FILST': 'filer',
+        'SEQUENCE': 'RECID',
+        'PENSIONS': 'e01500',
+        'DBE': 'e00600',
+        'KEOGH': 'e03300',
+        'TIRAD': 'e01400',
+        'NU18': 'nu18',
+        'N1821': 'n1821',
+        'N21': 'n21',
+        'CGAGIX': 'e01100',
+        'BLIND_HEAD': 'blind_head',
+        'BLIND_SPOUSE': 'blind_spouse',
+        'HMIE': 'e19200',
+        # 'SSI': 'ssi_ben',
+        # 'VB': 'vet_ben',
+        # 'MEDICARE': 'mcare_ben',
+        # 'MEDICAID': 'mcaid_ben',
+        # 'SS': 'ss_ben',
+        # 'SNAP': 'snap_ben',
+        'SLTX': 'e18400'
+    }
+    data = data.rename(columns=renames)
+    data['MARS'] = np.where(data.JS == 3, 4, data.JS)
+
+    # Use taxpayer and spouse records to get total tax unit earnings and AGI
+    data['e00100'] = data['JCPS9'] + data['JCPS19']
+    data['e00200'] = data['e00200p'] + data['e00200s']
+    data['e00900'] = data['e00900p'] + data['e00900s']
+    data['e02100'] = data['e02100p'] + data['e02100s']
+    # Determine amount of qualified dividends using IRS ratio
+    data['e00650'] = data.e00600 * 0.7556
+
+    # Split interest income into taxable and tax exempt using IRS ratio
+    taxable = 0.6
+    nontaxable = 1. - taxable
+    data['e00300'] = data.INTST * taxable
+    data['e00400'] = data.INTST * nontaxable
+
+    # Split pentions and annuities using PUF ratio
+    data['e01700'] = data['e01500'] * 0.1656
+
+    print 'Applying deduction limits'
+    data = deduction_limits(data)
+    print 'Adding dependents'
+    data = add_dependents(data)
+    print 'Adding AGI bins'
+    data = add_agi_bin(data, 'INCOME')
+    print 'Adjusting distribution'
+    data = adjust(data, adj_targets)
+    # print 'Adding Benefits Data'
+    # data = benefits(data, other_ben)
+    print 'Dropping unused variables'
+    data = drop_vars(data)
+
+    data = data.fillna(0.)
+    print 'Exporting...'
+    data.to_csv('cps.csv', index=False)
+    subprocess.check_call(["gzip", "-n", "cps.csv"])
+
+
+def deduction_limits(data):
+    """
+    Apply limits on itemized deductions
+    """
+    half_agi = data['e00100'] * 0.5
+    charity = np.where(data.CHARITABLE > half_agi, half_agi, data.CHARITABLE)
+    # Split charitable contributions into cash and non-cash using ratio in PUF
+    cash = 0.82013
+    non_cash = 1. - cash
+    data['e19800'] = charity * cash
+    data['e20100'] = charity * non_cash
+
+    # Apply student loan interest deduction limit
+    data['e03210'] = np.where(data.SLINT > 2500, 2500, data.SLINT)
+
+    # Apply IRA contribution limit
+    deductable_ira = np.where(data.AGE >= 50,
+                              np.where(data.ADJIRA > 6500, 6500, data.ADJIRA),
+                              np.where(data.ADJIRA > 5500, 5500, data.ADJIRA))
+    data['e03150'] = deductable_ira
+
+    return data
+
+
+def add_dependents(data):
+    # Count number of dependents under 13
+    # Max of four to match PUF version of nu13
+    age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 <= 13), 1, 0)
+    age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 <= 13), 1, 0)
+    age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 <= 13), 1, 0)
+    age4 = np.where((data.ICPS06 > 0) & (data.ICPS06 <= 13), 1, 0)
+    nu13 = age1 + age2 + age3 + age4
+    data['nu13'] = nu13
+
+    # Count number of dependents under 5
+    age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 <= 5), 1, 0)
+    age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 <= 5), 1, 0)
+    age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 <= 5), 1, 0)
+    age4 = np.where((data.ICPS06 > 0) & (data.ICPS06 <= 5), 1, 0)
+    age5 = np.where((data.ICPS07 > 0) & (data.ICPS06 <= 5), 1, 0)
+    nu05 = age1 + age2 + age3 + age4 + age5
+    data['nu05'] = nu05
+
+    # Count number of children eligible for child tax credit
+    # Max of three to mach PUF version of n24
+    age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 <= 17), 1, 0)
+    age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 <= 17), 1, 0)
+    age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 <= 17), 1, 0)
+    age4 = np.where((data.ICPS06 > 0) & (data.ICPS06 <= 17), 1, 0)
+    age5 = np.where((data.ICPS07) > 0 & (data.ICPS07 <= 17), 1, 0)
+    n24 = age1 + age2 + age3 + age4 + age5
+    n24 = np.where(n24 > 3, 3, n24)
+    data['n24'] = n24
+
+    # Count number of elderly dependents
+    age1 = np.where(data.ICPS03 >= 65, 1, 0)
+    age2 = np.where(data.ICPS04 >= 65, 1, 0)
+    age3 = np.where(data.ICPS05 >= 65, 1, 0)
+    age4 = np.where(data.ICPS06 >= 65, 1, 0)
+    age5 = np.where(data.ICPS07 >= 65, 1, 0)
+    elderly = age1 + age2 + age3 + age4 + age5
+    data['elderly_dependent'] = elderly
+
+    # Count number elegible for f2441
+    age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 < 13), 1, 0)
+    age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 < 13), 1, 0)
+    age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 < 13), 1, 0)
+    age4 = np.where((data.ICPS06 > 0) & (data.ICPS06 < 13), 1, 0)
+    age5 = np.where((data.ICPS07 > 0) & (data.ICPS07 < 13), 1, 0)
+    qualified = age1 + age2 + age3 + age4 + age5
+    data['f2441'] = np.where(qualified <= 3, qualified, 3)
+
+    # Count number elegible for EIC
+    age1 = np.where((data.ICPS03 > 0) & (data.ICPS03 < 19), 1, 0)
+    age2 = np.where((data.ICPS04 > 0) & (data.ICPS04 < 19), 1, 0)
+    age3 = np.where((data.ICPS05 > 0) & (data.ICPS05 < 19), 1, 0)
+    age4 = np.where((data.ICPS06 > 0) & (data.ICPS06 < 19), 1, 0)
+    age5 = np.where((data.ICPS07 > 0) & (data.ICPS07 < 19), 1, 0)
+    qualified = age1 + age2 + age3 + age4 + age5
+    data['EIC'] = np.where(qualified > 3, 3, qualified)
+
+    return data
+
+
+def drop_vars(data):
+    """
+    Returns PDF of data without unuseable variables
+    """
+    useable_vars = [
+        'DSI', 'EIC', 'FLPDYR', 'MARS', 'MIDR', 'RECID', 'XTOT', 'age_head',
+        'age_spouse', 'agi_bin', 'blind_head', 'blind_spouse', 'cmbtp',
+        'e00200', 'e00200p', 'e00200s', 'e00300', 'e00400', 'e00600', 'e00650',
+        'e00700', 'e00800', 'e00900', 'e00900p', 'e00900s', 'e01100', 'e01200',
+        'e01400', 'e01500', 'e01700', 'e02000', 'e02100', 'e02100p', 'e02100s',
+        'e02300', 'e02400', 'e03150', 'e03220', 'e03230', 'e03240', 'e03270',
+        'e03290', 'e03300', 'e03400', 'e03500', 'e07240', 'e07260', 'e07300',
+        'e07400', 'e07600', 'e09700', 'e09800', 'e09900', 'e11200', 'e17500',
+        'e18400', 'e18500', 'e19200', 'e19800', 'e20100', 'e20400', 'g20500',
+        'e24515', 'e24518', 'e26270', 'e27200', 'e32800', 'e58990', 'e62900',
+        'e87530', 'elderly_dependent', 'f2441', 'f6251', 'filer', 'n24',
+        'nu05', 'nu13', 'nu18', 'n1821', 'n21', 'p08000', 'p22250', 'p23250',
+        'p25470', 'p87521', 's006', 'e03210', 'ssi_ben', 'snap_ben',
+        'vet_ben', 'mcare_ben', 'mcaid_ben', 'ss_ben', 'other_ben', 'total_ben'
+    ]
+    # for i in range(1, 16):
+    #    useable_vars.append('SSI_VAL{}'.format(str(i)))
+    #    useable_vars.append('SSI_PROB{}'.format(str(i)))
+    drop_vars = []
+    for item in data.columns:
+        if item not in useable_vars:
+            drop_vars.append(item)
+    data = data.drop(drop_vars, axis=1)
+    return data
+
+
+def add_agi_bin(data, col_name):
+    """
+    Add an AGI bin indicator used in Tax-Calc to apply adjustment factors
+    """
+    agi = pd.Series([0] * len(data[col_name]))
+    agi[data[col_name] < 0] = 0
+    agi[(data[col_name] >= 0) & (data[col_name] < 5000)] = 1
+    agi[(data[col_name] >= 5000) & (data[col_name] < 10000)] = 2
+    agi[(data[col_name] >= 10000) & (data[col_name] < 15000)] = 3
+    agi[(data[col_name] >= 15000) & (data[col_name] < 20000)] = 4
+    agi[(data[col_name] >= 20000) & (data[col_name] < 25000)] = 5
+    agi[(data[col_name] >= 25000) & (data[col_name] < 30000)] = 6
+    agi[(data[col_name] >= 30000) & (data[col_name] < 40000)] = 7
+    agi[(data[col_name] >= 40000) & (data[col_name] < 50000)] = 8
+    agi[(data[col_name] >= 50000) & (data[col_name] < 75000)] = 9
+    agi[(data[col_name] >= 75000) & (data.INCOME < 100000)] = 10
+    agi[(data[col_name] >= 100000) & (data[col_name] < 200000)] = 11
+    agi[(data[col_name] >= 200000) & (data[col_name] < 500000)] = 12
+    agi[(data[col_name] >= 500000) & (data[col_name] < 1e6)] = 13
+    agi[(data[col_name] >= 1e6) & (data[col_name] < 1.5e6)] = 14
+    agi[(data[col_name] >= 1.5e6) & (data[col_name] < 2e6)] = 15
+    agi[(data[col_name] >= 2e6) & (data[col_name] < 5e6)] = 16
+    agi[(data[col_name] >= 5e6) & (data[col_name] < 1e7)] = 17
+    agi[(data[col_name] >= 1e7)] = 18
+
+    data['agi_bin'] = agi
+
+    return data
+
+
+def adjust_helper(agi, var, target, weight, agi_bin):
+    """
+    Parameters
+    ----------
+    agi: AGI provided in the CPS
+    var: variable being adjusted
+    target: target bin levels
+    weight: weights
+
+    Returns
+    -------
+    Series containing the adjusted values of the variable
+    """
+    # Goal total ensures the weighted sum of the variable wont change
+    goal_total = (var * weight).sum()
+    # Goal distribution based on IRS data
+    distribution = target / target.sum()
+    # Find the goal amount in each bin
+    goal_amts = goal_total * distribution
+    # Find current totals in each bin
+    bin_0 = np.where(agi < 0,
+                     var * weight, 0).sum()
+    bin_1 = np.where((agi >= 0) & (agi < 5000),
+                     var * weight, 0).sum()
+    bin_2 = np.where((agi >= 5000) & (agi < 10000),
+                     var * weight, 0).sum()
+    bin_3 = np.where((agi >= 10000) & (agi < 15000),
+                     var * weight, 0).sum()
+    bin_4 = np.where((agi >= 15000) & (agi < 20000),
+                     var * weight, 0).sum()
+    bin_5 = np.where((agi >= 20000) & (agi < 25000),
+                     var * weight, 0).sum()
+    bin_6 = np.where((agi >= 25000) & (agi < 30000),
+                     var * weight, 0).sum()
+    bin_7 = np.where((agi >= 30000) & (agi < 40000),
+                     var * weight, 0).sum()
+    bin_8 = np.where((agi >= 40000) & (agi < 50000),
+                     var * weight, 0).sum()
+    bin_9 = np.where((agi >= 50000) & (agi < 75000),
+                     var * weight, 0).sum()
+    bin_10 = np.where((agi >= 75000) & (agi < 100000),
+                      var * weight, 0).sum()
+    bin_11 = np.where((agi >= 100000) & (agi < 200000),
+                      var * weight, 0).sum()
+    bin_12 = np.where((agi >= 200000) & (agi < 500000),
+                      var * weight, 0).sum()
+    bin_13 = np.where((agi >= 500000) & (agi < 1e6),
+                      var * weight, 0).sum()
+    bin_14 = np.where((agi >= 1e6) & (agi < 1.5e6),
+                      var * weight, 0).sum()
+    bin_15 = np.where((agi >= 1.5e6) & (agi < 2e6),
+                      var * weight, 0).sum()
+    bin_16 = np.where((agi >= 2e6) & (agi < 5e6),
+                      var * weight, 0).sum()
+    bin_17 = np.where((agi >= 5e6) & (agi < 1e7),
+                      var * weight, 0).sum()
+    bin_18 = np.where((agi >= 1e7),
+                      var * weight, 0).sum()
+    # Create series holding each of the current totals
+    actual_amts = pd.Series([bin_0, bin_1, bin_2, bin_3, bin_4, bin_5,
+                             bin_6, bin_7, bin_8, bin_9, bin_10, bin_11,
+                             bin_12, bin_13, bin_14, bin_15, bin_16,
+                             bin_17, bin_18],
+                            index=goal_amts.index)
+    ratios_index = [num for num in range(0, 19)]
+    # Determine the ratios
+    ratios = pd.Series(goal_amts / actual_amts)
+    ratios.index = ratios_index
+
+    # Apply adjustment ratios
+    var_array = np.array(var)
+    var_array = np.nan_to_num(var_array)
+    ratios = np.where(ratios == np.inf, 1., ratios)
+    adj_array = ratios[agi_bin]
+    var *= adj_array
+
+    return var
+
+
+def adjust(data, targets):
+    """
+    data: CPS in DataFrame format
+    targets: targeted totals provided by the IRS
+    """
+    # Make copies of values to avoid pandas warning
+    inc = copy.deepcopy(data['INCOME'])
+    int_inc = copy.deepcopy(data['e00300'])
+    odiv_inc = copy.deepcopy(data['e00600'])
+    qdiv_inc = copy.deepcopy(data['e00650'])
+    biz_inc = copy.deepcopy(data['e00900'])
+    data['e00300'] = adjust_helper(inc, int_inc,
+                                   targets['INT'], data['s006'],
+                                   data['agi_bin'])
+    div_ratio = data['e00600'] / (data['e00600'] + data['e00650'])
+    data['e00600'] = adjust_helper(inc, odiv_inc,
+                                   targets['ODIV'], data['s006'],
+                                   data['agi_bin'])
+    data['e00650'] = adjust_helper(inc, qdiv_inc,
+                                   targets['QDIV'], data['s006'],
+                                   data['agi_bin'])
+    total = data['e00600'] + data['e00650']
+    data['e00600'] = total * div_ratio
+    data['e00650'] = total * (1. - div_ratio)
+    biz_ratio_p = data['e00900p'] / data['e00900']
+    biz_ratio_s = 1. - biz_ratio_p
+    data['e00900'] = adjust_helper(inc, biz_inc,
+                                   targets['BIZ'], data['s006'],
+                                   data['agi_bin'])
+    data['e00900p'] = data['e00900'] * biz_ratio_p
+    data['e00900s'] = data['e00900'] * biz_ratio_s
+
+    return data
+
+
+def benefits(data, other_ben):
+    """
+    Distribute benefits from non-models benefit programs and create total
+    benefits variable
+    """
+    # Distribute other benefits
+    data['dist_ben'] = (data['mcaid_ben'] + data['ssi_ben'] +
+                        data['snap_ben'] + data['vet_ben'])
+    data['ratio'] = (data['dist_ben'] * data['s006'] /
+                     (data['dist_ben'] + data['s006']).sum())
+    data['other_ben'] = data['ratio'] * other_ben['Cost'].sum() / data['s006']
+    data['total_ben'] = (data['mcaid_ben'] + data['mcare_ben'] +
+                         data['ssi_ben'] + data['snap_ben'] + data['ss_ben'] +
+                         data['vet_ben'] + data['other_ben'])
+    return data
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/cps_stage2/README.md b/cps_stage2/README.md
index 51299775..54e518c7 100644
--- a/cps_stage2/README.md
+++ b/cps_stage2/README.md
@@ -3,16 +3,19 @@ About cps_stage2
 
 This directory contains the following script:
 
-* Python script **....py**, which reads/writes:
+* Python script `finalprep.py`, which reads/writes:
 
   Input files:
-    - ...
+    - `cps_weights_raw.csv.gz`
 
   Output files:
-    - ...
+    - `cps_weights.csv.gz`
 
 
 Documentation
 -------------
 
-**all documentation files go in a `doc` subdirectory of this directory**
+`cps_weights_raw.csv.gz` was provided to us by John O'Hare of
+[Quantria Strategies](http://www.quantria.com). `finalprep.py`
+reads in this file, multiplies each record by 100, and changes each weight from
+a floating point to an integer in order to reduce file size.
diff --git a/cps_stage2/cps_weights.csv.gz b/cps_stage2/cps_weights.csv.gz
new file mode 100644
index 00000000..9d8bb767
Binary files /dev/null and b/cps_stage2/cps_weights.csv.gz differ
diff --git a/cps_stage2/cps_weights_raw.csv.gz b/cps_stage2/cps_weights_raw.csv.gz
new file mode 100644
index 00000000..419994ea
Binary files /dev/null and b/cps_stage2/cps_weights_raw.csv.gz differ
diff --git a/cps_stage2/finalprep.py b/cps_stage2/finalprep.py
new file mode 100644
index 00000000..02c5d339
--- /dev/null
+++ b/cps_stage2/finalprep.py
@@ -0,0 +1,9 @@
+import pandas as pd
+import subprocess
+
+
+weights = pd.read_csv('cps_weights_raw.csv.gz', compression='gzip')
+weights *= 100.
+weights = weights.round(0).astype('int64')
+weights.to_csv('cps_weights.csv', index=False)
+subprocess.check_call(['gzip', '-n', 'cps_weights.csv'])