From f9e88132c213f336adc8e44b18005382b34fdce1 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 26 Sep 2024 13:35:16 -0700 Subject: [PATCH 1/5] Add macOS files to gitignore --- .gitignore | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.gitignore b/.gitignore index eff8c02..4ebdf40 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +# === PYTHON === # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -156,3 +157,32 @@ cython_debug/ # PyCharm .idea/ + +# === MACOS === +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk From 2a520055906f01ad9e6eb5df895d1e267bf98ca2 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 26 Sep 2024 13:36:39 -0700 Subject: [PATCH 2/5] Move duplicated code to static methods; don't reread CSVs; use Pythonic names --- src/nmdc_geoloc_tools/geotools.py | 179 ++++++++++++++---------------- 1 file changed, 86 insertions(+), 93 deletions(-) diff --git a/src/nmdc_geoloc_tools/geotools.py b/src/nmdc_geoloc_tools/geotools.py index 8ae4ec7..8c6f7aa 100644 --- a/src/nmdc_geoloc_tools/geotools.py +++ b/src/nmdc_geoloc_tools/geotools.py @@ -1,9 +1,9 @@ import csv import json -import os import xml.etree.ElementTree as ET -from dataclasses import dataclass from datetime import datetime +from functools import cache +from pathlib import Path from typing import Tuple import requests @@ -11,30 +11,59 @@ LATLON = Tuple[float, float] -@dataclass +@cache +def read_data_csv(filename: str) -> list: + with open(Path(__file__).parent / "data" / filename) as file: + mapping = csv.reader(file) + return list(mapping) + + class GeoEngine: """ Wraps ORNL Identify to retrieve elevation data in meters, soil types, and land use. """ - def get_elevation(self, latlon: LATLON) -> float: - """ - Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and returns the elevation value in meters as a float. - """ + @property + def zobler_soil_type_lookup(self) -> list: + return read_data_csv("zobler_540_MixS_lookup.csv") + + @property + def envo_landuse_systems_lookup(self) -> list: + return read_data_csv("ENVO_Landuse_Systems_lookup.csv") + + @property + def envo_landuse_lookup(self) -> list: + return read_data_csv("ENVO_Landuse_lookup.csv") + + @staticmethod + def _validate_latlon(latlon: LATLON): lat = latlon[0] lon = latlon[1] if not -90 <= lat <= 90: - raise ValueError("Invalid Latitude: " + str(lat)) + raise ValueError(f"Invalid Latitude: {lat}") if not -180 <= lon <= 180: - raise ValueError("Invalid Longitude: " + str(lon)) - # Generate bounding box used in query from lat & lon. 0.008333333333333 comes from maplayer resolution provided by ORNL - remX = (lon + 180) % 0.008333333333333 - remY = (lat + 90) % 0.008333333333333 - minX = lon - remX - maxX = lon - remX + 0.008333333333333 - minY = lat - remY - maxY = lat - remY + 0.008333333333333 - BBOX = str(minX) + "," + str(minY) + "," + str(maxX) + "," + str(maxY) + raise ValueError(f"Invalid Longitude: {lon}") + return lat, lon + + @staticmethod + def _bbox(lat: float, lon: float, resolution: float) -> str: + rem_x = (lon + 180) % resolution + rem_y = (lat + 90) % resolution + min_x = lon - rem_x + max_x = lon - rem_x + resolution + min_y = lat - rem_y + max_y = lat - rem_y + resolution + return f"{min_x},{min_y},{max_x},{max_y}" + + def get_elevation(self, latlon: LATLON) -> float: + """ + Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and + returns the elevation value in meters as a float. + """ + lat, lon = self._validate_latlon(latlon) + # Generate bounding box used in query from lat & lon. 0.008333333333333 comes from maplayer + # resolution provided by ORNL + bbox = self._bbox(lat, lon, 0.008333333333333) elevparams = { "originator": "QAQCIdentify", "SERVICE": "WMS", @@ -48,7 +77,7 @@ def get_elevation(self, latlon: LATLON) -> float: "X": "2", "Y": "2", "INFO_FORMAT": "text/xml", - "BBOX": BBOX, + "BBOX": bbox, } response = requests.get( "https://webmap.ornl.gov/ogcbroker/wms", params=elevparams @@ -65,32 +94,15 @@ def get_elevation(self, latlon: LATLON) -> float: def get_fao_soil_type(self, latlon: LATLON) -> str: """ - Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and returns the soil type as a string. + Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and + returns the soil type as a string. """ - lat = latlon[0] - lon = latlon[1] - if not -90 <= lat <= 90: - raise ValueError("Invalid Latitude: " + str(lat)) - if not -180 <= lon <= 180: - raise ValueError("Invalid Longitude: " + str(lon)) - # Generate bounding box used in query from lat & lon. 0.5 comes from maplayer resolution provided by ORNL - remX = (lon + 180) % 0.5 - remY = (lat + 90) % 0.5 - minX = lon - remX - maxX = lon - remX + 0.5 - minY = lat - remY - maxY = lat - remY + 0.5 - - # Read in the mapping file note need to get this path right - with open( - os.path.dirname(__file__) + "/data/zobler_540_MixS_lookup.csv" - ) as mapper: - mapping = csv.reader(mapper) - map = list(mapping) + lat, lon = self._validate_latlon(latlon) + # Generate bounding box used in query from lat & lon. 0.5 comes from maplayer resolution + # provided by ORNL + bbox = self._bbox(lat, lon, 0.5) - BBoxstring = str(minX) + "," + str(minY) + "," + str(maxX) + "," + str(maxY) - - faosoilparams = { + fao_soil_params = { "INFO_FORMAT": "text/xml", "WIDTH": "5", "originator": "QAQCIdentify", @@ -98,7 +110,7 @@ def get_fao_soil_type(self, latlon: LATLON) -> str: "LAYERS": "540_1_band1", "REQUEST": "GetFeatureInfo", "SRS": "EPSG:4326", - "BBOX": BBoxstring, + "BBOX": bbox, "VERSION": "1.1.1", "X": "2", "Y": "2", @@ -107,17 +119,17 @@ def get_fao_soil_type(self, latlon: LATLON) -> str: "map": "/sdat/config/mapfile//540/540_1_wms.map", } response = requests.get( - "https://webmap.ornl.gov/cgi-bin/mapserv", params=faosoilparams + "https://webmap.ornl.gov/cgi-bin/mapserv", params=fao_soil_params ) if response.status_code == 200: - faosoilxml = response.content.decode("utf-8") - if faosoilxml == "": + fao_soil_xml = response.content.decode("utf-8") + if fao_soil_xml == "": raise ValueError("Empty string returned") - root = ET.fromstring(faosoilxml) + root = ET.fromstring(fao_soil_xml) results = root[5].text results = results.split(":") results = results[1].strip() - for res in map: + for res in self.zobler_soil_type_lookup: if res[0] == results: results = res[1] return results @@ -127,73 +139,54 @@ def get_fao_soil_type(self, latlon: LATLON) -> str: def get_landuse_dates(self, latlon: LATLON) -> []: """ - Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and returns as array of valid dates (YYYY-MM-DD format) for the landuse requests. + Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]) and + returns as array of valid dates (YYYY-MM-DD format) for the landuse requests. """ - lat = latlon[0] - lon = latlon[1] - if not -90 <= lat <= 90: - raise ValueError("Invalid Latitude: " + str(lat)) - if not -180 <= lon <= 180: - raise ValueError("Invalid Longitude: " + str(lon)) - landuseparams = {"latitude": lat, "longitude": lon} + lat, lon = self._validate_latlon(latlon) + landuse_params = {"latitude": lat, "longitude": lon} response = requests.get( - "https://modis.ornl.gov/rst/api/v1/MCD12Q1/dates", params=landuseparams + "https://modis.ornl.gov/rst/api/v1/MCD12Q1/dates", params=landuse_params ) if response.status_code == 200: - landuseDates = response.content.decode("utf-8") - if landuseDates == "": + landuse_dates = response.content.decode("utf-8") + if landuse_dates == "": raise ValueError("No valid Landuse dates returned") - data = json.loads(landuseDates) - validDates = [] + data = json.loads(landuse_dates) + valid_dates = [] for date in data["dates"]: - validDates.append(date["calendar_date"]) - return validDates + valid_dates.append(date["calendar_date"]) + return valid_dates else: raise ApiException(response.status_code) - def get_landuse(self, latlon: LATLON, startDate, endDate) -> {}: + def get_landuse(self, latlon: LATLON, start_date, end_date) -> {}: """ - Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]), the start date (YYYY-MM-DD), and end date (YYYY-MM-DD) and returns a dictionary containing the land use values for the classification systems for the dates requested. + Accepts decimal degrees latitude and longitude as an array (array[latitude, longitude]), the + start date (YYYY-MM-DD), and end date (YYYY-MM-DD) and returns a dictionary containing the + land use values for the classification systems for the dates requested. """ - lat = latlon[0] - lon = latlon[1] - if not -90 <= lat <= 90: - raise ValueError("Invalid Latitude: " + str(lat)) - if not -180 <= lon <= 180: - raise ValueError("Invalid Longitude: " + str(lon)) + lat, lon = self._validate_latlon(latlon) # function accepts dates in YYYY-MM-DD format, but API requires a unique format (AYYYYDOY) date_format = "%Y-%m-%d" - start_date_obj = datetime.strptime(startDate, date_format) - end_date_obj = datetime.strptime(endDate, date_format) + start_date_obj = datetime.strptime(start_date, date_format) + end_date_obj = datetime.strptime(end_date, date_format) - apiStartDate = ( + api_start_date = ( "A" + str(start_date_obj.year) + str(start_date_obj.strftime("%j")) ) - apiEndDate = "A" + str(end_date_obj.year) + str(end_date_obj.strftime("%j")) + api_end_date = "A" + str(end_date_obj.year) + str(end_date_obj.strftime("%j")) - landuseparams = { + landuse_params = { "latitude": lat, "longitude": lon, - "startDate": apiStartDate, - "endDate": apiEndDate, + "startDate": api_start_date, + "endDate": api_end_date, "kmAboveBelow": 0, "kmLeftRight": 0, } response = requests.get( - "https://modis.ornl.gov/rst/api/v1/MCD12Q1/subset", params=landuseparams + "https://modis.ornl.gov/rst/api/v1/MCD12Q1/subset", params=landuse_params ) - # Retrieve Classification System mapping - with open( - os.path.dirname(__file__) + "/data/ENVO_Landuse_Systems_lookup.csv" - ) as mapper: - mapping = csv.reader(mapper) - sytems_map = list(mapping) - # Retrieve Classification System to ENVO mapping - with open( - os.path.dirname(__file__) + "/data/ENVO_Landuse_lookup.csv" - ) as mapper: - mapping = csv.reader(mapper) - landuse_map = list(mapping) if response.status_code == 200: landuse = response.content.decode("utf-8") results = {} @@ -203,10 +196,10 @@ def get_landuse(self, latlon: LATLON, startDate, endDate) -> {}: for band in data["subset"]: system = "NONE" band["data"] = list(map(int, band["data"])) - for res in sytems_map: + for res in self.envo_landuse_systems_lookup: if res[1] == band["band"]: system = res[0] - for res in landuse_map: + for res in self.envo_landuse_lookup: if res[8] == system and int(res[1]) in band["data"]: envo_term = res[2] if envo_term == "": From 94dcfbf8d228d44eedc634d2b5053ea614a64db1 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 26 Sep 2024 13:40:09 -0700 Subject: [PATCH 3/5] Use function-based test style --- tests/test_geotools.py | 71 ++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/tests/test_geotools.py b/tests/test_geotools.py index 417585a..975655c 100644 --- a/tests/test_geotools.py +++ b/tests/test_geotools.py @@ -3,43 +3,54 @@ from nmdc_geoloc_tools.geotools import GeoEngine -class TestGeotools: - ge = GeoEngine() +@pytest.fixture() +def engine(): + return GeoEngine() - def test_elevation_mount_everest(self): - assert int(self.ge.get_elevation((27.9881, 86.9250))) == 8752 - def test_elevation_death_valley(self): - assert int(self.ge.get_elevation((36.5322649, -116.9325408))) == -66 +def test_elevation_mount_everest(engine): + assert int(engine.get_elevation((27.9881, 86.9250))) == 8752 - def test_elevation_ocean(self): - with pytest.raises(ValueError): - self.ge.get_elevation((0, 0)) - def test_elevation_bad_coordinates(self): - with pytest.raises(ValueError): - self.ge.get_elevation((-200, 200)) +def test_elevation_death_valley(engine): + assert int(engine.get_elevation((36.5322649, -116.9325408))) == -66 - def test_soil_type_cambisols(self): - assert self.ge.get_fao_soil_type((32.95047, -87.393259)) == "Cambisols" - def test_soil_type_water(self): - assert self.ge.get_fao_soil_type((0, 0)) == "Water" +def test_elevation_ocean(engine): + with pytest.raises(ValueError): + engine.get_elevation((0, 0)) - def test_soil_type_bad_coordinates(self): - with pytest.raises(ValueError): - self.ge.get_fao_soil_type((-200, 200)) - def test_landuse_bad_coordinates(self): - with pytest.raises(ValueError): - self.ge.get_landuse((-200, 200), "2001-01-01", "2002-01-01") +def test_elevation_bad_coordinates(engine): + with pytest.raises(ValueError): + engine.get_elevation((-200, 200)) - def test_landuse_date_death_valley(self): - dates = self.ge.get_landuse_dates((36.5322649, -116.9325408)) - assert dates[0] == "2001-01-01" - def test_landuse_death_valley(self): - data = self.ge.get_landuse( - (36.5322649, -116.9325408), "2001-01-01", "2002-01-01" - ) - assert data["LCCS1"][0]["envo_term"] == "area of barren land" +def test_soil_type_cambisols(engine): + assert engine.get_fao_soil_type((32.95047, -87.393259)) == "Cambisols" + + +def test_soil_type_water(engine): + assert engine.get_fao_soil_type((0, 0)) == "Water" + + +def test_soil_type_bad_coordinates(engine): + with pytest.raises(ValueError): + engine.get_fao_soil_type((-200, 200)) + + +def test_landuse_bad_coordinates(engine): + with pytest.raises(ValueError): + engine.get_landuse((-200, 200), "2001-01-01", "2002-01-01") + + +def test_landuse_date_death_valley(engine): + dates = engine.get_landuse_dates((36.5322649, -116.9325408)) + assert dates[0] == "2001-01-01" + + +def test_landuse_death_valley(engine): + data = engine.get_landuse( + (36.5322649, -116.9325408), "2001-01-01", "2002-01-01" + ) + assert data["LCCS1"][0]["envo_term"] == "area of barren land" From 6911f4ad963e11e6da62b1fc0eba262d82f8dfb6 Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 26 Sep 2024 13:41:33 -0700 Subject: [PATCH 4/5] Allow importing GeoEngine from top-level package --- src/nmdc_geoloc_tools/__init__.py | 1 + tests/test_geotools.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nmdc_geoloc_tools/__init__.py b/src/nmdc_geoloc_tools/__init__.py index e69de29..64570b6 100644 --- a/src/nmdc_geoloc_tools/__init__.py +++ b/src/nmdc_geoloc_tools/__init__.py @@ -0,0 +1 @@ +from nmdc_geoloc_tools.geotools import GeoEngine as GeoEngine diff --git a/tests/test_geotools.py b/tests/test_geotools.py index 975655c..a2c1b5f 100644 --- a/tests/test_geotools.py +++ b/tests/test_geotools.py @@ -1,6 +1,6 @@ import pytest -from nmdc_geoloc_tools.geotools import GeoEngine +from nmdc_geoloc_tools import GeoEngine @pytest.fixture() From 3b96e0b1be27559ef1f33072262a5b2e6bd3e72d Mon Sep 17 00:00:00 2001 From: Patrick Kalita Date: Thu, 26 Sep 2024 14:05:41 -0700 Subject: [PATCH 5/5] Fix formatting --- tests/test_geotools.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_geotools.py b/tests/test_geotools.py index a2c1b5f..4debd3c 100644 --- a/tests/test_geotools.py +++ b/tests/test_geotools.py @@ -50,7 +50,5 @@ def test_landuse_date_death_valley(engine): def test_landuse_death_valley(engine): - data = engine.get_landuse( - (36.5322649, -116.9325408), "2001-01-01", "2002-01-01" - ) + data = engine.get_landuse((36.5322649, -116.9325408), "2001-01-01", "2002-01-01") assert data["LCCS1"][0]["envo_term"] == "area of barren land"