-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ensemble capability, KCRPS for single-step training and batch-size=1
- Loading branch information
Showing
4 changed files
with
369 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,292 @@ | ||
# --------------------------------------------------------------------------------------------------------------------- # | ||
# This yaml file implements hourly state-in-state-out crossformer | ||
# on NSF NCAR HPCs (casper.ucar.edu and derecho.hpc.ucar.edu) | ||
# The model is trained on hourly model-level ERA5 data with top solar irradiance, geopotential, and land-sea mask | ||
# Output variables: model level [U, V, T, Q], single level [SP, t2m], and 500 hPa [U, V, T, Z, Q] | ||
# --------------------------------------------------------------------------------------------------------------------- # | ||
save_loc: '/glade/work/$USER/CREDIT_runs/test_cesm_ensemble/' | ||
seed: 1000 | ||
|
||
#net short top of model: FSNT | ||
#net short bot of model: FSNS | ||
#net long top of model: FLNT | ||
#net long bot of model: FLNS | ||
#note Q is vapor + condensed (total liquid) | ||
#Fnet_sfc = FSNS - FLNS - LHFLX - SHFLX <<<<<<------- | ||
#Fnet_TOA = | ||
|
||
#The net radiative flux at "top of atmosphere" is sometimes called RESTOA ("RES" for residual), and at "top of model" it is called RESTOM. These are typically diagnosed from model output as | ||
|
||
#RESTOM = FSNT - FLNT <<<<<<------- | ||
#RESTOA = FSNTOA - FLUT | ||
|
||
#The RHS of these are the net shortwave and longwave fluxes. They are computed as differences: | ||
|
||
#FSNT = SOLIN - FSUT | ||
#FSNTOA = SOLIN - FSUTOA | ||
#FLNT = FLUT - [FLDT] | ||
|
||
#FLDT is not usually output from the model, but FLNT and FLUT are. SOLIN is the insolation. | ||
|
||
data: | ||
# upper-air variables | ||
variables: ['U','V','T','Q'] | ||
save_loc: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/f.e21.CREDIT_climate_????.zarr' | ||
|
||
# surface variables | ||
surface_variables: ['PS','PSL','TREFHT'] | ||
save_loc_surface: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/f.e21.CREDIT_climate_????.zarr' | ||
|
||
# dynamic forcing variables | ||
dynamic_forcing_variables: ['SOLIN'] | ||
save_loc_dynamic_forcing: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/f.e21.CREDIT_climate_????.zarr' | ||
|
||
# static variables | ||
static_variables: ['z_norm','LANDM_COSLAT'] | ||
save_loc_static: '/glade/derecho/scratch/dkimpara/f.e21.CREDIT_climate.statics_1.0deg_32levs_latlon_v3.nc' | ||
|
||
# diagnostic variables | ||
diagnostic_variables: ['PRECT','CLDTOT','CLDHGH','CLDLOW','CLDMED','TAUX','TAUY','U10', 'QFLX','FSNS','FLNS','FSNT','FLNT','SHFLX','LHFLX'] | ||
save_loc_diagnostic: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/f.e21.CREDIT_climate_????.zarr' | ||
|
||
save_loc_physics: '/glade/derecho/scratch/dkimpara/f.e21.CREDIT_climate.statics_1.0deg_32levs_latlon_v3.nc' | ||
|
||
# mean / std path | ||
mean_path: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/mean_6h_1979_2010_32lev_1.0deg_new.nc' | ||
# regular z-score version | ||
# std_path: '/glade/derecho/scratch/ksha/CREDIT_data/std_1h_1979_2018_16lev_0.25deg.nc' | ||
# residual norm version | ||
std_path: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/std_6h_1979_2010_32lev_1.0deg_new.nc' | ||
|
||
# train / validation split | ||
train_years: [1979, 2009] | ||
valid_years: [2010, 2014] | ||
|
||
scaler_type: 'std_new' | ||
|
||
# state-in-state-out | ||
history_len: 1 | ||
valid_history_len: 1 | ||
|
||
# single step | ||
forecast_len: 0 | ||
valid_forecast_len: 0 | ||
|
||
one_shot: True | ||
lead_time_periods: 6 | ||
skip_periods: null | ||
|
||
# compatible with the old 'std' | ||
static_first: True | ||
|
||
level_info_file: '/glade/derecho/scratch/dkimpara/f.e21.CREDIT_climate.statics_1.0deg_32levs_latlon_v3.nc' | ||
#level_list: [10, 30, 40, 50, 60, 70, 80, 90, 95, 100, 105, 110, 120, 130, 136, 137] not used for cesm | ||
timestep: 21600 # 1h timestep | ||
|
||
trainer: | ||
type: standard # <---------- change to your type | ||
|
||
mode: none | ||
cpu_offload: False | ||
activation_checkpoint: True | ||
|
||
load_weights: True | ||
load_optimizer: False | ||
load_scaler: False | ||
load_sheduler: False | ||
|
||
skip_validation: False | ||
update_learning_rate: False | ||
|
||
save_backup_weights: True | ||
save_best_weights: True | ||
|
||
learning_rate: 1.0e-04 # <-- change to your lr | ||
weight_decay: 0 | ||
|
||
train_batch_size: 1 | ||
valid_batch_size: 1 | ||
ensemble_size: 2 | ||
long_rollout: True | ||
|
||
batches_per_epoch: 1000 | ||
valid_batches_per_epoch: 4 | ||
stopping_patience: 999 | ||
|
||
start_epoch: 0 | ||
num_epoch: 50 | ||
reload_epoch: False | ||
epochs: &epochs 100 | ||
|
||
use_scheduler: True | ||
scheduler: {'scheduler_type': 'cosine-annealing', 'T_max': *epochs, 'last_epoch': -1} | ||
|
||
amp: False | ||
grad_accum_every: 1 | ||
grad_max_norm: 1.0 | ||
|
||
# number of workers | ||
thread_workers: 4 | ||
valid_thread_workers: 0 | ||
|
||
model: | ||
# crossformer example | ||
type: "crossformer" | ||
frames: 1 # number of input states (default: 1) | ||
image_height: 192 # number of latitude grids (default: 640) | ||
image_width: 288 # number of longitude grids (default: 1280) | ||
levels: 32 # number of upper-air variable levels (default: 15) | ||
channels: 4 # upper-air variable channels | ||
surface_channels: 3 # surface variable channels | ||
input_only_channels: 3 # dynamic forcing, forcing, static channels | ||
output_only_channels: 15 # diagnostic variable channels | ||
|
||
patch_width: 1 # number of latitude grids in each 3D patch (default: 1) | ||
patch_height: 1 # number of longitude grids in each 3D patch (default: 1) | ||
frame_patch_size: 1 # number of input states in each 3D patch (default: 1) | ||
|
||
dim: [64, 128, 256, 512] # Dimensionality of each layer | ||
depth: [2, 2, 8, 2] # Depth of each layer | ||
global_window_size: [4, 4, 2, 1] # Global window size for each layer | ||
local_window_size: 3 # Local window size | ||
cross_embed_kernel_sizes: # kernel sizes for cross-embedding | ||
- [4, 8, 16, 32] | ||
- [2, 4] | ||
- [2, 4] | ||
- [2, 4] | ||
cross_embed_strides: [2, 2, 2, 2] # Strides for cross-embedding (default: [4, 2, 2, 2]) | ||
attn_dropout: 0. # Dropout probability for attention layers (default: 0.0) | ||
ff_dropout: 0. # Dropout probability for feed-forward layers (default: 0.0) | ||
|
||
# map boundary padding | ||
pad_lon: 48 # number of grids to pad on 0 and 360 deg lon | ||
pad_lat: 48 # number of grids to pad on -90 and 90 deg lat | ||
|
||
post_conf: | ||
activate: True | ||
#this scaling maps your variables to the ERA5 units | ||
#make sure to adjust if your timestep is not 6 hours | ||
requires_scaling: True | ||
scaling_coefs: | ||
tot_precip: 86400 #model timestep in seconds m/day -> m/s | ||
surf_net_solar: 21600 #model timestep in seconds (W/s) -> J/s | ||
surf_net_therm: -21600 #model timestep in seconds (W/s) -> J/s | ||
surf_shflx: -21600 #model timestep in seconds (W/s) -> J/s | ||
surf_lhflx: -21600 #model timestep in seconds (W/s) -> J/s | ||
top_net_solar: 21600 #model timestep in seconds (W/s) -> J/s | ||
top_net_therm: -21600 #model timestep in seconds (W/s) -> J/s | ||
evap: -1 | ||
SP: 1 | ||
U: 1 | ||
V: 1 | ||
gph_surf: 1 | ||
temp: 1 | ||
Q: 1 | ||
T: 1 | ||
|
||
skebs: | ||
activate: True | ||
lmax: None | ||
mmax: None | ||
freeze_base_model_weights: True | ||
multistep: False | ||
uniform_dissipation: True # tunable fixed dissipation rate array else, use a backscatter FCNN | ||
grid: 'equiangular' | ||
|
||
tracer_fixer: | ||
activate: False | ||
denorm: True | ||
tracer_name: ['Q','PRECT','U10','CLDTOT','CLDHGH','CLDLOW','CLDMED'] | ||
tracer_thres: [0, 0, 0, 0, 0, 0, 0] | ||
|
||
global_mass_fixer: | ||
activate: False | ||
activate_outside_model: False | ||
simple_demo: False | ||
denorm: True | ||
grid_type: 'sigma' | ||
midpoint: True | ||
fix_level_num: 14 | ||
lon_lat_level_name: ['lon2d', 'lat2d', 'hyai', 'hybi'] | ||
surface_pressure_name: ['PS'] | ||
specific_total_water_name: ['Q'] | ||
|
||
global_water_fixer: | ||
activate: False | ||
activate_outside_model: False | ||
simple_demo: False | ||
denorm: True | ||
grid_type: 'sigma' | ||
midpoint: True | ||
lon_lat_level_name: ['lon2d', 'lat2d', 'hyai', 'hybi'] | ||
surface_pressure_name: ['PS'] | ||
specific_total_water_name: ['Q'] | ||
precipitation_name: ['PRECT'] | ||
evaporation_name: ['QFLX'] | ||
|
||
global_energy_fixer: | ||
activate: False | ||
activate_outside_model: False | ||
simple_demo: False | ||
denorm: True | ||
grid_type: 'sigma' | ||
midpoint: True | ||
lon_lat_level_name: ['lon2d', 'lat2d', 'hyai', 'hybi'] | ||
surface_pressure_name: ['PS'] | ||
air_temperature_name: ['T'] | ||
specific_total_water_name: ['Q'] | ||
u_wind_name: ['U'] | ||
v_wind_name: ['V'] | ||
surface_geopotential_name: ['PHIS'] | ||
TOA_net_radiation_flux_name: ['FSNT', 'FLNT'] | ||
surface_net_radiation_flux_name: ['FSNS', 'FLNS'] | ||
surface_energy_flux_name: ['SHFLX', 'LHFLX',] | ||
|
||
loss: | ||
# the main training loss | ||
training_loss: "KCRPS" | ||
|
||
# power loss (x), spectral_loss (x) | ||
use_power_loss: False | ||
use_spectral_loss: False | ||
|
||
# use latitude weighting | ||
use_latitude_weights: True | ||
latitude_weights: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/LSM_static_variables_ERA5_zhght_onedeg.nc' | ||
|
||
# turn-off variable weighting | ||
use_variable_weights: False | ||
|
||
predict: | ||
forecasts: | ||
type: "custom" # keep it as "custom" | ||
start_year: 2010 # year of the first initialization (where rollout will start) | ||
start_month: 1 # month of the first initialization | ||
start_day: 1 # day of the first initialization | ||
start_hours: [0] # hour-of-day for each initialization, 0 for 00Z, 12 for 12Z | ||
duration: 1 # number of days to initialize, starting from the (year, mon, day) above | ||
# duration should be divisible by the number of GPUs | ||
# (e.g., duration: 384 for 365-day rollout using 32 GPUs) | ||
days: 10 # forecast lead time as days (1 means 24-hour forecast) | ||
|
||
save_forecast: '/glade/derecho/scratch/wchapman/CREDIT/cesm_wxformer/' | ||
save_vars: ['PRECT','CLDTOT','CLDHGH','CLDLOW','CLDMED','TAUX','TAUY','U10','QFLX','FSNS','FLNS','FSNT','FLNT','SHFLX','LHFLX'] | ||
forcing_file: '/glade/campaign/cisl/aiml/wchapman/MLWPS/STAGING/f.e21.CREDIT_climate.climaterun.statics_1.0deg.nc' | ||
climate_timesteps: 2190 | ||
|
||
# turn-off low-pass filter | ||
use_laplace_filter: False | ||
|
||
# deprecated | ||
# save_format: "nc" | ||
|
||
pbs: #derecho | ||
conda: "/glade/u/home/dkimpara/credit-derecho" | ||
project: "NAML0001" | ||
job_name: "wxformer_1h" | ||
walltime: "12:00:00" | ||
nodes: 2 | ||
ncpus: 64 | ||
ngpus: 4 | ||
mem: '480GB' | ||
queue: 'main' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.