From af333e099086310da220ac0f0ac322ea806ac1e4 Mon Sep 17 00:00:00 2001 From: li000363 Date: Mon, 23 Sep 2024 15:59:35 -0400 Subject: [PATCH] post-scan update --- .../src/phsafe_safetab_reader/cef_config.ini | 10 ++++----- .../cef_validator_classes.py | 3 --- .../safetab_cef_config.ini | 10 ++++----- .../safetab_cef_config_2010.ini | 10 ++++----- .../safetab_h_cef_config_2010.ini | 12 +++++----- .../safetab_p_cef_reader.py | 22 ------------------- 6 files changed, 21 insertions(+), 46 deletions(-) diff --git a/mitre/src/phsafe_safetab_reader/cef_config.ini b/mitre/src/phsafe_safetab_reader/cef_config.ini index c816770..70e3182 100644 --- a/mitre/src/phsafe_safetab_reader/cef_config.ini +++ b/mitre/src/phsafe_safetab_reader/cef_config.ini @@ -1,8 +1,8 @@ [paths] -per_dir = s3://v-s3-das-ite-sourcedata-252999262699-us-gov-west-1/2010-convert/cef/us/per/ -unit_dir = s3://v-s3-das-ite-sourcedata-252999262699-us-gov-west-1/2010-convert/cef/us/unit/ -per_dir_pr = s3://v-s3-das-ite-sourcedata-252999262699-us-gov-west-1/2010-convert/cef/pr/per/ -unit_dir_pr = s3://v-s3-das-ite-sourcedata-252999262699-us-gov-west-1/2010-convert/cef/pr/unit/ -grfc_dir = s3://v-s3-das-ite-sourcedata-252999262699-us-gov-west-1/2010-convert/grfc/ +per_dir = +unit_dir = +per_dir_pr = +unit_dir_pr = +grfc_dir = per_file_format = CEF20_PER_%%s.txt unit_file_format = CEF20_UNIT_%%s.txt diff --git a/mitre/src/phsafe_safetab_reader/cef_validator_classes.py b/mitre/src/phsafe_safetab_reader/cef_validator_classes.py index b8b7717..f48c3d8 100644 --- a/mitre/src/phsafe_safetab_reader/cef_validator_classes.py +++ b/mitre/src/phsafe_safetab_reader/cef_validator_classes.py @@ -5,7 +5,6 @@ # This file was automatically generated by ../etl_2020/census_etl/spec_scanner.py on Fri Oct 30 14:06:08 2020 # Command line: ../etl_2020/census_etl/spec_scanner.py ../etl_2020/cef/ADO-DEV-CEF_FOR_CDL_Layouts-2020DRPS-20200611.xlsx --output_parser ../das_decennial/programs/reader/cef_2020/cef_validator_classes.py --tablenames CEF20_UNIT CEF20_PERSON CEF20_CNT -# Automatically generated on Fri Oct 30 14:06:08 2020 by /mnt/users/will0555/new-das-vm/das-vm-config/das_decennial/das_framework/ctools/schema/table.py def leftpad(x,width): return ' '*(width-len(str(x)))+str(x) @@ -488,7 +487,6 @@ def parse_piped_line(line): -# Automatically generated on Fri Oct 30 14:06:08 2020 by /mnt/users/will0555/new-das-vm/das-vm-config/das_decennial/das_framework/ctools/schema/table.py def leftpad(x,width): return ' '*(width-len(str(x)))+str(x) @@ -1023,7 +1021,6 @@ def parse_piped_line(line): -# Automatically generated on Fri Oct 30 14:06:08 2020 by /mnt/users/will0555/new-das-vm/das-vm-config/das_decennial/das_framework/ctools/schema/table.py def leftpad(x,width): return ' '*(width-len(str(x)))+str(x) diff --git a/mitre/src/phsafe_safetab_reader/safetab_cef_config.ini b/mitre/src/phsafe_safetab_reader/safetab_cef_config.ini index 427d7c9..720f50c 100644 --- a/mitre/src/phsafe_safetab_reader/safetab_cef_config.ini +++ b/mitre/src/phsafe_safetab_reader/safetab_cef_config.ini @@ -1,9 +1,9 @@ [paths] -per_dir = s3://v-s3-das-prod-data-412241963457-us-gov-west-1/mft/cdl-to-das/PER/ -unit_dir = s3://v-s3-das-prod-data-412241963457-us-gov-west-1/mft/cdl-to-das/UNIT/ -per_dir_pr = s3://v-s3-das-prod-data-412241963457-us-gov-west-1/mft/cdl-to-das/PER/ -unit_dir_pr = s3://v-s3-das-prod-data-412241963457-us-gov-west-1/mft/cdl-to-das/UNIT/ -grfc_dir = s3://v-s3-das-common-drps-412241963457-us-gov-west-1/2020/production/grfc/ +per_dir = +unit_dir = +per_dir_pr = +unit_dir_pr = +grfc_dir = per_file_format = CEF20_PER_%%s.txt unit_file_format = CEF20_UNIT_%%s.txt geo_file_format = grfc_tab20_%%s.txt diff --git a/mitre/src/phsafe_safetab_reader/safetab_cef_config_2010.ini b/mitre/src/phsafe_safetab_reader/safetab_cef_config_2010.ini index c2fd3b0..720f50c 100644 --- a/mitre/src/phsafe_safetab_reader/safetab_cef_config_2010.ini +++ b/mitre/src/phsafe_safetab_reader/safetab_cef_config_2010.ini @@ -1,9 +1,9 @@ [paths] -per_dir = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/per/ -unit_dir = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/unit/ -per_dir_pr = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/per/ -unit_dir_pr = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/unit/ -grfc_dir = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/2010-convert/grfc/ +per_dir = +unit_dir = +per_dir_pr = +unit_dir_pr = +grfc_dir = per_file_format = CEF20_PER_%%s.txt unit_file_format = CEF20_UNIT_%%s.txt geo_file_format = grfc_tab20_%%s.txt diff --git a/mitre/src/phsafe_safetab_reader/safetab_h_cef_config_2010.ini b/mitre/src/phsafe_safetab_reader/safetab_h_cef_config_2010.ini index 362247e..81a9e78 100644 --- a/mitre/src/phsafe_safetab_reader/safetab_h_cef_config_2010.ini +++ b/mitre/src/phsafe_safetab_reader/safetab_h_cef_config_2010.ini @@ -1,10 +1,10 @@ [paths] -per_dir = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/per/ -unit_dir = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/unit/ -per_dir_pr = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/per/ -unit_dir_pr = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/cef/2010_2020fmt_qrace_v2/unit/ -grfc_dir = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/2010-convert/grfc/ -pop_dir = s3://v-s3-das-ite-data-252999262699-us-gov-west-1/pop-group-totals/ +per_dir = +unit_dir = +per_dir_pr = +unit_dir_pr = +grfc_dir = +pop_dir = per_file_format = CEF20_PER_%%s.txt unit_file_format = CEF20_UNIT_%%s.txt geo_file_format = grfc_tab20_%%s.txt diff --git a/mitre/src/phsafe_safetab_reader/safetab_p_cef_reader.py b/mitre/src/phsafe_safetab_reader/safetab_p_cef_reader.py index f22f8db..788f8f7 100644 --- a/mitre/src/phsafe_safetab_reader/safetab_p_cef_reader.py +++ b/mitre/src/phsafe_safetab_reader/safetab_p_cef_reader.py @@ -266,25 +266,3 @@ def get_person_df(self) -> SparkDataFrame: print("person_df:", p_reader.get_person_df()) - ''' - import uuid - unique_id = uuid.uuid1() - model_write = f"cef_reader_{unique_id}" - file_name = f"{model_write}.txt" - local_file = f"/tmp/{file_name}" - s3_file = f"s3://uscb-decennial-ite-das/users/zhou0021/debug/reader/{file_name}" - - print("TEST123-----------------------------") - for i in range(10): - print(reader.get_person_df().rdd.takeSample(False,1)) - print("PERDF-----------------") - print(reader.get_unit_df().rdd.takeSample(False,1)) - print("UNIDF_________________") - - with open(local_file, "at+") as f: - f.write(f"PER_DF:{reader.get_person_df().rdd.takeSample(False,1)}\n\n") - f.write(f"UNIT_DF:{reader.get_unit_df().rdd.takeSample(False,1)}\n\n") - - cmd = ['/usr/bin/aws', 's3', 'cp', local_file, s3_file] - subprocess.Popen(cmd, stderr=subprocess.PIPE) - '''