Skip to content

Commit

Permalink
Updated scripts for processing infrastructure dataset, generalized wo…
Browse files Browse the repository at this point in the history
…rkflow script name and contents to import appropriate config depending on dataset, generalized name of config object, removed unnecessary comments and old code, updated python environment in slurm script to reflect recent updates to vis-raster package.
  • Loading branch information
julietcohen committed Mar 13, 2024
1 parent a2907a1 commit f5960b8
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 147 deletions.
75 changes: 75 additions & 0 deletions infrastructure_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from datetime import datetime
import subprocess
import numpy as np

# always include the tailing slash "/"
# define user on Delta, avoid writing files to other user's dir
user = subprocess.check_output("whoami").strip().decode("ascii")
head_node = 'cn102/'
#head_node = 'gpub___'

INPUT = '/scratch/bbou/julietcohen/infrastructure/input/'
output_subdir = 'infrastructure/output'
OUTPUT = f'/scratch/bbou/{user}/{output_subdir}/'

STAGING_LOCAL = '/tmp/staged/'
STAGING_REMOTE = OUTPUT + 'staged/'
STAGING_REMOTE_MERGED = STAGING_REMOTE + head_node

GEOTIFF_LOCAL = '/tmp/geotiff/'
GEOTIFF_REMOTE = OUTPUT + 'geotiff/'

WEBTILE_REMOTE = OUTPUT + 'web_tiles/'

""" final config is exported here, and imported in the workflow python file. """
CONFIG = {
"deduplicate_clip_to_footprint": False,
"deduplicate_method": None,
"deduplicate_at": None,
"deduplicate_keep_rules": None,
"dir_output": OUTPUT,
"dir_input": INPUT,
"ext_input": ".gpkg",
"dir_geotiff_remote": GEOTIFF_REMOTE,
"dir_geotiff_local": GEOTIFF_LOCAL,
"dir_web_tiles": WEBTILE_REMOTE,
"dir_staged_remote": STAGING_REMOTE,
"dir_staged_remote_merged": STAGING_REMOTE_MERGED,
"dir_staged_local": STAGING_LOCAL,
"filename_staging_summary": STAGING_REMOTE + "staging_summary.csv",
"filename_rasterization_events": GEOTIFF_REMOTE + "raster_events.csv",
"filename_rasters_summary": GEOTIFF_REMOTE + "raster_summary.csv",
"version": datetime.now().strftime("%B%d,%Y"),
"simplify_tolerance": 0.1,
"tms_id": "WGS1984Quad",
"z_range": [
0,
12
],
"geometricError": 57,
"z_coord": 0,
"statistics": [
{
"name": "infrastructure_code",
"weight_by": "area",
"property": "DN",
"aggregation_method": "max",
"resampling_method": "nearest",
"val_range": [
11,
50
],
"palette": [
"#f48525",
"#f4e625",
"#47f425",
"#25f4e2",
"#2525f4",
"#f425c3",
"#f42525"
],
"nodata_val": 0,
"nodata_color": "#ffffff00"
}
]
}
20 changes: 10 additions & 10 deletions rsync_staging_to_scratch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,25 @@
# IWP_CONFIG = PRODUCTION_IWP_CONFIG.IWP_CONFIG
# IWP_CONFIG2 = IWP_CONFIG.copy()

# for processing lake data:
import lake_change_config
IWP_CONFIG = lake_change_config.IWP_CONFIG
IWP_CONFIG2 = IWP_CONFIG.copy()

# for testing branches with IWP data:
# import branch_testing_iwp_config
# IWP_CONFIG = branch_testing_iwp_config.IWP_CONFIG
# IWP_CONFIG2 = IWP_CONFIG.copy()

# for infrastructure data:
import infrastructure_config
CONFIG = infrastructure_config.CONFIG
CONFIG2 = CONFIG.copy()
# -----------------------------------------------------

# set config properties for current context
IWP_CONFIG2['dir_staged'] = IWP_CONFIG2['dir_staged_local']
SOURCE = IWP_CONFIG2['dir_staged']
IWP_CONFIG2['dir_staged'] = IWP_CONFIG2['dir_staged_remote']
DESTINATION = IWP_CONFIG2['dir_staged']
CONFIG2['dir_staged'] = CONFIG2['dir_staged_local']
SOURCE = CONFIG2['dir_staged']
CONFIG2['dir_staged'] = CONFIG2['dir_staged_remote']
DESTINATION = CONFIG2['dir_staged']

print("Using config: ")
pprint.pprint(IWP_CONFIG2)
pprint.pprint(CONFIG2)

# define user on Delta, avoid writing files to other user's dir
user = subprocess.check_output("whoami").strip().decode("ascii")
Expand Down
42 changes: 9 additions & 33 deletions rsync_step2_raster_highest_to_scratch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@
#import PRODUCTION_IWP_CONFIG
#IWP_CONFIG = PRODUCTION_IWP_CONFIG.IWP_CONFIG

# for processing lake data:
import lake_change_config
IWP_CONFIG = lake_change_config.IWP_CONFIG

# for testing branches with IWP data:
# import branch_testing_iwp_config
# IWP_CONFIG = branch_testing_iwp_config.IWP_CONFIG

# for infrastructure data:
import infrastructure_config
CONFIG = infrastructure_config.CONFIG
# -----------------------------------------------------

# set config properties for current context
IWP_CONFIG['dir_geotiff'] = IWP_CONFIG['dir_geotiff_local']
SOURCE = IWP_CONFIG['dir_geotiff']
IWP_CONFIG['dir_geotiff'] = IWP_CONFIG['dir_geotiff_remote']
DESTINATION = IWP_CONFIG['dir_geotiff']
CONFIG['dir_geotiff'] = CONFIG['dir_geotiff_local']
SOURCE = CONFIG['dir_geotiff']
CONFIG['dir_geotiff'] = CONFIG['dir_geotiff_remote']
DESTINATION = CONFIG['dir_geotiff']

# define user on Delta, avoid writing files to other user's dir
user = subprocess.check_output("whoami").strip().decode("ascii")
Expand Down Expand Up @@ -57,28 +57,4 @@
process = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)

print("All jobs launched! They will work in the background WITHOUT stdout printing. ")

# OLD CODE THAT IS WRONG BECAUSE WE DO NOT WANT HOSTNAMES TO BE SUBDIRS OF THE GEOTIFF DIR IN SCRATCH BC
# ALL RASTER HIGHEST NEED TO BE IN THE SAME SUBDIR TO CORRECTLY EXECUTE THE RASTER LOWER STEP
# count = 0
# for hostname in hostnames:
# # to use ssh in rsync (over a remote sheel) use the following: `rsync -rv --rsh=ssh hostname::module /dest``
# # see https://manpages.ubuntu.com/manpages/focal/en/man1/rsync.1.html (USING RSYNC-DAEMON FEATURES VIA A REMOTE-SHELL CONNECTION)

# # mkdir then sync
# mkdir = ['mkdir', '-p', f'{DESTINATION}{hostname}']
# process = Popen(mkdir, stdin=PIPE, stdout=PIPE, stderr=PIPE)
# time.sleep(0.2)

# ssh = ['ssh', f'{hostname}',]
# rsync = ['rsync', '-r', '--update', SOURCE, f'{DESTINATION}{hostname}']
# cmd = ssh + rsync
# print(f"'{count} of {len(hostnames)}'. running command: {cmd}")
# count += 1

# process = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)

# print("All jobs launched! They will work in the background WITHOUT stdout printing. ")

# otpional improvement
# shlex.split(s) -- turn cmd line args into a list.

7 changes: 4 additions & 3 deletions slurm/BEST_cpu_ray_double_srun.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
#SBATCH --job-name=pdg_viz
#SBATCH --partition=cpu
#SBATCH --account=
#SBATCH --time=24:00:00
#SBATCH --time=48:00:00

#SBATCH --export=ALL,RAY_worker_register_timeout_seconds=120

#SBATCH --nodes=20
#SBATCH --nodes=1
#SBATCH --mem=0
#SBATCH --exclusive

Expand Down Expand Up @@ -39,7 +39,8 @@ set -x
echo "This is BEST_cpu_ray_double_srun.slurm"

# venv init
source /scratch/bbou/julietcohen/venv/iwp_3/bin/activate
source /scratch/bbou/julietcohen/venv/infrastructure/bin/activate
# source /scratch/bbou/julietcohen/venv/iwp_3/bin/activate
# set file soft limit to maximum value (not unlimited because that's not permitted)
# before any srun's are executed
ulimit -n 32768
Expand Down
Loading

0 comments on commit f5960b8

Please sign in to comment.