Skip to content

Commit

Permalink
Merge pull request #22 from scottstanie/orbit-step-2
Browse files Browse the repository at this point in the history
Move orbit download to step 2
  • Loading branch information
scottstanie authored Aug 25, 2023
2 parents f1b9245 + 0f810f1 commit bac0e11
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 68 deletions.
37 changes: 25 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,28 @@

Workflow for creating unwrapped interferograms from Sentinel-1 geocoded SLCs.

## Install

`sweets` is available to install via conda-forge:

## Install
```bash
mamba install -c conda-forge sweets
```

The following will install `sweets` into a conda environment.
Alternatively, the following will install `sweets` into a conda environment.

1. Download source code:
```bash
git clone https://github.com/opera-adt/sweets.git && cd sweets
```
2. Install dependencies:
```bash
conda env create --file conda-env.yml
mamba env create --file conda-env.yml
```

or if you have an existing environment:
```bash
conda env update --name my-existing-env --file conda-env.yml
mambba env update --name my-existing-env --file conda-env.yml
```

3. Install `sweets` via pip:
Expand All @@ -29,27 +33,32 @@ conda activate sweets-env
python -m pip install .
```


## Usage

From the command line, installing will create a `sweets` executable. You can run `sweets --help` to see the available options.
```bash
sweets --help
```
You need to specify the bounding box (left, bottom, right, top) of the area of interest, the start dates (and end date, or it is assumed to be today), and the track number.
To configure a workflow, the minimum inputs are
- the bounding box of the area of interest in degrees longitude/latitude as (left, bottom right top)
- the start date (and end date, or it is assumed to be today)
- the track (relative orbit) number.

For example:

```bash
sweets config --bbox -102.3407 31.9909 -101.9407 32.3909 --start "2022-10-15" --track 78
sweets run sweets_config.yaml
sweets config --bbox -102.2 32.15 -102.1 32.22 --start 2022-12-15 --end 2022-12-29 --track 78
```
This will make a YAML configuration file (by default `sweets_config.yaml`). You can inspect it to see all the configuration defaults.

You can also print an empty config file to edit any parameters manually

Then you can kick off the workflow using
```bash
sweets config --print-empty
sweets run sweets_config.yaml
```

Or you can set all the parameters in python:
### Configuration from Python

Alternatively, you can configure everything in python:
```python
from sweets.core import Workflow
bbox = (-102.3407 31.9909 -101.9407 32.3909)
Expand All @@ -70,7 +79,11 @@ w = Workflow.from_yaml("sweets_config.yml")
w.run()
```

You can also print an empty config file to edit any parameters manually

```bash
sweets config --print-empty
```

## License

Expand Down
11 changes: 4 additions & 7 deletions conda-env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@ dependencies:
- python>=3.8
- pip>=21.3 # https://pip.pypa.io/en/stable/reference/build-system/pyproject-toml/#editable-installation
- git # for pip install, due to setuptools_scm
- aria2 # ASF Downloading
- wget # ASF Downloading
# - isce3>=0.14.0
# - compass>=0.4.1
# - dask>=2022.6.0
# - dolphin>=0.2.0
# - gdal>=3.5
# - geopandas-base>=0.12.0
# - h5py>=3.6
# - numpy>=1.20
# - pandas>=1.5
# - pydantic>=2.1
# - requests>=2.20
# - rich>=12.0
Expand All @@ -28,16 +26,15 @@ dependencies:
- dask
- dolphin>=0.3.0
- gdal
# - geopandas-base
- h5py
# - matplotlib-base
- numpy
# - pandas
- pydantic>2.1
- pyproj>=3.3
- python-dateutil
- rasterio
- requests
- rich
- rioxarray
- s1reader>=0.2.1
- sardem
- sentineleof
- shapely
78 changes: 47 additions & 31 deletions docker/specfile.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/sweets/_orbit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

def download_orbits(search_path: Path, save_dir: Path) -> List[Path]:
"""Download orbit files for a given search path."""
logger.debug(f"search_path: {search_path}, save_dir: {save_dir}")
logger.info(f"Orbit search_path: {search_path}, save_dir: {save_dir}")
filenames = download.main(
search_path=search_path,
save_dir=save_dir,
Expand Down
29 changes: 14 additions & 15 deletions src/sweets/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def _download_water_mask(self) -> Future:
create_water_mask, self._water_mask_filename, self._dem_bbox
)

def _download_rslcs(self) -> Future:
def _download_rslcs(self) -> list[Path]:
"""Download Sentinel zip files from ASF."""
self.log_dir.mkdir(parents=True, exist_ok=True)
# The final name will depend on if we're unzipping or not
Expand All @@ -264,18 +264,15 @@ def _download_rslcs(self) -> Future:
f"Found {len(existing_files)} existing files in"
f" {self.asf_query.out_dir}. Skipping download."
)
return self._client.submit(lambda: existing_files)
return existing_files

# If we didn't have any, we need to download them
# TODO: how should we handle partial/failed downloads... do we really
# want to re-search for them each time?
# Maybe there can be a "force" flag to re-download everything?
# or perhaps an API search, then if the number matches, we can skip
# rather than let aria2c start and do the checksums
rslc_futures = self._client.submit(
self.asf_query.download, log_dir=self.log_dir
)
return rslc_futures
return self.asf_query.download(log_dir=self.log_dir)

@log_runtime
def _geocode_slcs(self, slc_files, dem_file, burst_db_file):
Expand Down Expand Up @@ -324,6 +321,12 @@ def cfg_to_filename(cfg_path: Path) -> str:

# Get the first config file (by date) for each of the bursts.
# We only need to create the static layers once per burst
def cfg_to_static_filename(cfg_path: Path) -> str:
# Convert the YAML filename to a .h5 filename with date switched
# geo_runconfig_20221029_t078_165578_iw3.yaml -> t078_165578_iw2_20221029.h5
burst = "_".join(cfg_path.stem.split("_")[3:])
return f"static_layers_{burst}.h5"

existing_static_paths = self._get_burst_static_layers()
name_to_paths = {p.name: p for p in existing_static_paths}
logger.info(f"Found {len(name_to_paths)} existing geometry files")
Expand All @@ -333,13 +336,12 @@ def cfg_to_filename(cfg_path: Path) -> str:
static_files = []
todo_static = []
for cfg_file in day1_cfg_paths:
name = cfg_to_filename(cfg_file)
name = cfg_to_static_filename(cfg_file)
if name in name_to_paths:
static_files.append(name_to_paths[name])
else:
# Run the geocoding if we dont have it already
todo_static.append(cfg_file)

run_func = partial(run_static_layers, log_dir=self.log_dir)
with ProcessPoolExecutor(max_workers=self.n_workers) as _client:
new_files = _client.map(run_func, todo_static)
Expand Down Expand Up @@ -415,7 +417,7 @@ def _create_burst_interferograms(self, gslc_files):
# Group the SLCs by burst:
# {'t078_165573_iw2': [PosixPath('gslcs/t078_165573_iw2/20221029/...], 't078_...
burst_to_gslc = group_by_burst(gslc_files)
burst_to_ifg = group_by_burst(self._get_existing_burst_ifgs())
burst_to_ifg = group_by_burst(self._get_existing_burst_ifgs(), minimum_images=1)
ifg_path_list = []
for burst, gslc_files in burst_to_gslc.items():
subdatasets = [self._get_subdataset(f) for f in gslc_files]
Expand Down Expand Up @@ -543,20 +545,17 @@ def run(self, starting_step: int = 1):
dem_fut = self._download_dem()
burst_db_fut = self._download_burst_db()
water_mask_future = self._download_water_mask()
rslc_futures = self._download_rslcs()
orbits_future = self._client.submit(
download_orbits, self.asf_query.out_dir, self.orbit_dir
)
# Gather the futures once everything is downloaded
burst_db_file = burst_db_fut.result()
dem_fut.result()
wait([water_mask_future])
wait([orbits_future])
rslc_files = rslc_futures.result()

rslc_files = self._download_rslcs()

# Second step:
if starting_step <= 2:
burst_db_file = get_burst_db()
download_orbits(self.asf_query.out_dir, self.orbit_dir)
rslc_files = self._get_existing_rslcs()
self._geocode_slcs(rslc_files, self._dem_filename, burst_db_file)

Expand Down
19 changes: 17 additions & 2 deletions src/sweets/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import subprocess
import sys
import zipfile
from concurrent.futures import ThreadPoolExecutor
from datetime import date, datetime
from functools import lru_cache
from pathlib import Path
Expand Down Expand Up @@ -192,6 +193,20 @@ def _download_with_aria(self, urls, log_dir: Filename = Path(".")):
with open(log_filename, "w") as f:
subprocess.run(aria_cmd, shell=True, stdout=f, stderr=f, text=True)

def _download_with_wget(self, urls, log_dir: Filename = Path(".")):
def download_url(idx_url_pair):
idx, u = idx_url_pair
log_filename = Path(log_dir) / f"wget_{idx:02d}.log"
with open(log_filename, "w") as f:
wget_cmd = f'wget -nc -c "{u}" -P "{self.out_dir}"'
logger.info(f"({idx} / {len(urls)}): Downloading {u} with wget")
logger.info(wget_cmd)
subprocess.run(wget_cmd, shell=True, stdout=f, stderr=f, text=True)

# Parallelize the download using ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=3) as executor:
list(executor.map(download_url, enumerate(urls)))

@log_runtime
def download(self, log_dir: Filename = Path(".")) -> list[Path]:
# Start by saving data available as geojson
Expand All @@ -206,8 +221,8 @@ def download(self, log_dir: Filename = Path(".")) -> list[Path]:
self.out_dir.mkdir(parents=True, exist_ok=True)
file_names = [self.out_dir / f for f in self._file_names(results)]

# NOTE: aria should skip already-downloaded files
self._download_with_aria(urls, log_dir=log_dir)
# TODO: use aria if available? or just make wget parallel...
self._download_with_wget(urls, log_dir=log_dir)

if self.unzip:
# Change to .SAFE extension
Expand Down
27 changes: 27 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from sweets.core import Workflow


def test_workflow_construct(tmp_path):
bbox = [-102.2, 32.15, -102.1, 32.22]
# start, end, track = "2022-10-15", "2023-02-20", 78
start, end, track = "2022-12-15", "2022-12-29", 78
n_workers, tpw = 1, 16

w = Workflow(
asf_query=dict(
start=start,
end=end,
relativeOrbit=track,
out_dir="data",
),
bbox=bbox,
n_workers=n_workers,
threads_per_worker=tpw,
max_bandwidth=1,
orbit_dir="orbits",
)
# print(w.run()) # Print out the final output results
outfile = tmp_path / "config.yaml"
w.to_yaml(outfile, with_comments=True)
w2 = Workflow.from_yaml(outfile)
assert w == w2

0 comments on commit bac0e11

Please sign in to comment.