diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..69cc06f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +FROM lambci/lambda:build-python3.6 + + +# Installing system libraries +RUN \ + yum install -y wget; \ + yum install -y geos-devel; \ + yum clean all; \ + yum autoremove; + + +# Paths +ENV \ + PREFIX=/usr/local \ + LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64 + +# Switch to build directory +WORKDIR /build + +# Installing cognition-datasources + requirements +COPY requirements-dev.txt ./ + +RUN \ + pip install -r requirements-dev.txt; \ + pip install git+https://github.com/geospatial-jeff/cognition-datasources.git + + + +# Copy shell scripts +COPY bin/* /usr/local/bin/ + +WORKDIR /home/cognition-datasources \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index c3d5431..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include datasources/template/* -include datasources/template/.circleci/* -include datasources/template/docs/* \ No newline at end of file diff --git a/README.md b/README.md index b2b7b7f..4ef431e 100644 --- a/README.md +++ b/README.md @@ -1,61 +1,47 @@ -# cognition-datasources - ## About -This library defines a STAC-compliant standardized interface for searching geospatial assets, primarily remotely sensed imagery. The [Spatio-Temporal-Asset-Catalog (STAC)](https://github.com/radiantearth/stac-spec) specification provides common metadata and API schemas to search and access geospatial data. The standardized interface used by the library is based on the STAC spec and allows searching across three dimensions: - -- **Spatial:** Find all assets which intersect a bounding box. -- **Temporal:** Find all assets acquired within a temporal window. -- **Properties:** Find all assets with certain metadata. -Not all commonly used datasources are currently STAC-compliant. In such cases, the library maintains a standardized search interface by wrapping the API with a STAC-compatible API which parses the initial search parameters into a format compatible with the underlying API. A request to the API is sent and the response is parsed into a STAC Item and returned to the user. The table below of supported datasources states which are STAC-compliant. +This library defines a pluggable, STAC-compliant, service for searching geospatial assets, primarily remotely sensed imagery, and serves two primary purposes: -![title](docs/images/api-diagram.png) +1. Define a pluggable driver interface (similar to GraphQL resolvers) for wrapping the STAC spec around legacy datasources. +2. Provide a framework for loading / executing drivers both locally and in the cloud. -#### Datasource Drivers -The interface defined by the library is extended by datasource drivers which are defined in external github repositories and loaded into the library through a command line interface. Similar to how drivers control hardware, the logic implemented in the datasource driver influences how cognition-datasources accesses the underlying datasource. Each driver is expected to follow a specific pattern and pass a standard set of test cases (enforced with CircleCI). Check out the [contribution guidelines](/docs/contributing.md) for a guide on how to develop your own datasource driver! +Each driver translates the STAC-compliant request into a format compatible with the underlying API while translating the API response to a valid STAC Item. Drivers are packaged and deployed to AWS Lambda and a single API Gateway endpoint is created which allows searching the loaded datasources. The goal is to create an extensible service which allows users to integrate their datasets with the STAC ecosystem without having to change how their data is stored and queried. +![title](docs/images/service-diagram.png?style=centerme) -## Setup +## Installation ``` -# Install library -pip install git+https://github.com/geospatial-jeff/cognition-datasources - -# Load datasources -cognition-datasources load -d Landsat8 -d Sentinel2 +git clone https://github.com/geospatial-jeff/cognition-datasources +cd cognition-datasources +python setup.py develop ``` -## Usage - -#### Python -```python -from datasources import Manifest - -# Create manifest -manifest = Manifest() +## Deployment +``` +# Load datasources +cognition-datasources -d Landsat8 -d Sentinel2 -d SRTM -d NAIP -# Search arguments -spatial = {"type": "Polygon", "coordinates": [[...]]} -temporal = ("2018-10-30", "2018-12-31") -properties = {'eo:cloud_cover': {'lt': 10}} +# Build docker container +docker build . -t cognition-datasources:latest -# Create searches for Landsat8 and Sentinel2 -manifest['Landsat8'].search(spatial, temporal=temporal, properties=properties) -manifest['Sentinel2'].search(spatial, temporal=temporal, properties=properties) +# Package service +docker run --rm -v $PWD:/home/cognition-datasources -it cognition-datasources:latest package-service.sh -# Execute searches -response = manifest.execute() +# Deploy to AWS +sls deploy -v ``` +Read the [deployment docs](./docs/deployment.md) for more information on deployment. -#### CLI -``` -cognition-datasources search xmin ymin xmax ymax --start-date "2018-10-30" --end-date "2018-12-31" -d Landsat8 -d Sentinel2 --output response.json -``` +## Usage +The deployment generates an AWS API Gateway endpoint which supports STAC-compliant searches of the loaded datasources through the `/stac/search` endpoint (POST). Read the [API docs](./docs/README.md) for usage details. + +A live example lives [here](https://github.com/geospatial-jeff/cognition-datasources-api). ## Testing -Each driver must pass a [standard set of test cases](datasources/tests.py) and uses CircleCI to ensure only working drivers are loaded into the library. View the status of each driver [here](/docs/datasource-status.md). +Each driver must pass a [standard set of test cases](./datasources/tests.py) and uses CircleCI to ensure only working drivers are loaded into the library. View the status of each driver [here](./docs/datasource-status.md). -## Documentation -Read the [quickstart](./docs/quickstart.ipynb) and [documentation](./docs). +## Contributing +Check out the [contributing docs](./docs/contributing.md) for step-by-step guide for building your own driver. ## Supported Datasource Drivers | Name | Source | STAC-Compliant | Notes | @@ -69,4 +55,4 @@ Read the [quickstart](./docs/quickstart.ipynb) and [documentation](./docs). | [Sentinel2](https://github.com/geospatial-jeff/cognition-datasources-sentinel2) | [AWS Earth: Sentinel2](https://registry.opendata.aws/sentinel-2/) | True | Sends requests to [sat-api](https://github.com/sat-utils/sat-api). | | [SRTM](https://github.com/geospatial-jeff/cognition-datasources-srtm) | [AWS: Terrain Tiles](https://registry.opendata.aws/terrain-tiles/) | False | Does not send any requests. | | [USGS 3DEP](https://github.com/geospatial-jeff/cognition-datasources-usgs3dep) | [AWS: USGS 3DEP](https://registry.opendata.aws/usgs-lidar/) | False | Sends request to AWS S3 Bucket. | -| [Microsoft Building Footprints](https://github.com/geospatial-jeff/cognition-datasources-mbf) | [Microsoft](https://github.com/Microsoft/USBuildingFootprints) / [ESRI](https://www.arcgis.com/home/item.html?id=f40326b0dea54330ae39584012807126) | False | Sends requests to ESRI Feature Layer | \ No newline at end of file +| [Microsoft Building Footprints](https://github.com/geospatial-jeff/cognition-datasources-mbf) | [Microsoft](https://github.com/Microsoft/USBuildingFootprints) / [ESRI](https://www.arcgis.com/home/item.html?id=f40326b0dea54330ae39584012807126) | False | Sends requests to ESRI Feature Layer | diff --git a/bin/package-layer.sh b/bin/package-layer.sh new file mode 100755 index 0000000..31d16a5 --- /dev/null +++ b/bin/package-layer.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Directory used for deployment +export DEPLOY_DIR=layer + +mkdir $DEPLOY_DIR + +PYPATH=/var/lang/lib/python3.6/site-packages + + +echo Creating deployment package for cognition-datasources + +# Moving python libraries +mkdir $DEPLOY_DIR/python +EXCLUDE="shapely* stac_validator* s3transfer* boto3* botocore* pip* docutils* *.pyc setuptools* wheel* coverage* testfixtures* mock* *.egg-info *.dist-info __pycache__ easy_install.py" + +EXCLUDES=() +for E in ${EXCLUDE} +do + EXCLUDES+=("--exclude ${E} ") +done + +rsync -ax $PYPATH/ $DEPLOY_DIR/python/ ${EXCLUDES[@]} + +cd $DEPLOY_DIR +zip -ruq ../lambda-layer.zip ./ \ No newline at end of file diff --git a/bin/package-service.sh b/bin/package-service.sh new file mode 100755 index 0000000..e46c34d --- /dev/null +++ b/bin/package-service.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Directory used for deployment +export DEPLOY_DIR=service + +mkdir $DEPLOY_DIR + +# Moving handler +cp handler.py $DEPLOY_DIR + +cd $DEPLOY_DIR +zip -ruq ../lambda-service-package.zip ./ \ No newline at end of file diff --git a/datasources/__init__.py b/datasources/__init__.py index b8c24c8..7db68be 100644 --- a/datasources/__init__.py +++ b/datasources/__init__.py @@ -2,3 +2,4 @@ from .manifest import Manifest +layer_arn = 'arn:aws:lambda:us-east-1:725820063953:layer:cognition-datasources:6' \ No newline at end of file diff --git a/datasources/scripts/_cli.py b/datasources/scripts/_cli.py index 98fdf7d..5c4c7a9 100644 --- a/datasources/scripts/_cli.py +++ b/datasources/scripts/_cli.py @@ -9,7 +9,7 @@ import yaml from multiprocessing.pool import ThreadPool -from datasources import Manifest, sources +from datasources import Manifest, sources, layer_arn @click.group(short_help="Cognition datasource query") def cognition_datasources(): @@ -44,7 +44,6 @@ def search(spatial, start_date, end_date, properties, datasource, debug, output) manifest = Manifest() for source in datasource: - manifest.load_source(getattr(sources, source)) manifest[source].search(geoj, temporal=temporal, properties=properties, limit=10) if debug: @@ -64,38 +63,40 @@ def search(spatial, start_date, end_date, properties, datasource, debug, output) return 0 - @cognition_datasources.command(name='new') @click.option('--name', '-n', type=str) def new(name): if os.path.exists(name): raise ValueError("The directory {} already exists.".format(name)) - shutil.copytree(os.path.join(os.path.dirname(__file__), '..', 'template'), name) + shutil.copytree(os.path.join(os.path.dirname(__file__), '..', '..', 'driver'), name) - with open(os.path.join(os.getcwd(), name, 'template.py'), 'r') as f: - contents = f.read() - contents = contents.replace('__TEMPLATENAME__', name) + fpaths = [ + os.path.join(os.getcwd(), name, 'template.py'), + os.path.join(os.getcwd(), name, 'tests.py'), + os.path.join(os.getcwd(), name, 'bin', 'driver-package.sh'), + os.path.join(os.getcwd(), name, 'handler.py'), + os.path.join(os.getcwd(), name, 'README.md') + ] - with open(os.path.join(os.getcwd(), name, 'template.py'), 'w') as outf: - outf.write(contents) - - with open(os.path.join(os.getcwd(), name, 'tests.py'), 'r') as f: - contents = f.read() - contents = contents.replace('__TEMPLATENAME__', name) - - with open(os.path.join(os.getcwd(), name, 'tests.py'), 'w') as outf: - outf.write(contents) - - os.rename(os.path.join(os.getcwd(), name, 'template.py'), os.path.join(os.getcwd(), name, '{}.py'.format(name))) + for file in fpaths: + replace_template_name(file, name) + os.rename(fpaths[0], os.path.join(os.path.dirname(fpaths[0]), f'{name}.py')) @cognition_datasources.command(name='load') @click.option('--datasource', '-d', type=str, multiple=True) -def load(datasource): +@click.option('--local/--deployed', default=False) +def load(datasource, local): + + handler = [] + sls_functions = {} + for source in datasource: + print("Loading the {} driver.".format(source)) source_link = getattr(sources.remote, source) - project_path = '/'.join(source_link.split('/')[3:-1]) + project_path = '/'.join(source_link.split('/')[4:]) + source_link += '@master' # Check CI build r = requests.get(os.path.join(source_link, 'config.yml')) @@ -106,45 +107,59 @@ def load(datasource): print("WARNING: {} was not loaded because it failed CI".format(source)) continue - # Download remote datasource .py file into sources folder - source_fname = source + '.py' - source_remote_url = os.path.join(source_link, source_fname) - r = requests.get(source_remote_url) - with open(os.path.join(os.path.dirname(__file__), '..', 'sources', source_fname), 'w+') as outfile: - outfile.write(r.text) - - # Install datasource dependencies - fd, path = tempfile.mkstemp() - req_remote_url = os.path.join(source_link, 'requirements.txt') - try: - with os.fdopen(fd, 'w') as tmp: - r = requests.get(req_remote_url) - tmp.write(r.text) - finally: - subprocess.call("pip install -r {}".format(path), shell=True) - os.remove(path) - - # Check for index - idx_remote_url = os.path.join(source_link, 'index.idx') - dat_remote_url = os.path.join(source_link, 'index.dat') - - idx_r = requests.get(idx_remote_url) - dat_r = requests.get(dat_remote_url) - - if idx_r.status_code == 404 or dat_r.status_code == 404: - continue - - static_dir = os.path.join(os.path.dirname(__file__), '..', 'static') - if not os.path.exists(static_dir): - os.makedirs(static_dir) - - with open(os.path.join(static_dir, '{}_rtree.idx'.format(source)), 'wb+') as outfile: - outfile.write(idx_r.content) - - with open(os.path.join(static_dir, '{}_rtree.dat'.format(source)), 'wb+') as outfile: - outfile.write(dat_r.content) + # Download remote file handler + handler_url = os.path.join(source_link, 'handler.py') + r = requests.get(handler_url) + for line in r.text.splitlines()[2:]: + handler.append(line + '\n') + + # Build sls function config + sls_functions.update({ + source: { + 'handler': 'handler.' + source, + 'layers': [ + layer_arn, + md['layer-arn'], + ] + } + }) + + # Add database layer arn if present + if 'db-arn' in md: + sls_functions[source]['layers'].append(md['db-arn']) + + + if local: + # Download driver file to local installation of cognition-datasources + driver_url = os.path.join(source_link, f"{source}.py") + r = requests.get(driver_url) + with open(os.path.join(os.path.dirname(__file__), '..', 'sources', f"{source}.py"), "w+") as driver_file: + driver_file.write(r.text) + + # Install dependencies + fd, path = tempfile.mkstemp() + req_url = os.path.join(source_link, "requirements.txt") + try: + with os.fdopen(fd, 'w') as tmp: + r = requests.get(req_url) + tmp.write(r.text) + finally: + subprocess.call("pip install -r {}".format(path), shell=True) + os.remove(path) + + # Write handler.py + with open(os.path.join(os.path.dirname(__file__), '..', '..', 'handler.py'), 'a+') as outfile: + for line in handler: + outfile.write(line) + + # Write serverless.yml + with open(os.path.join(os.path.dirname(__file__), '..', '..', 'serverless.yml'), 'r+') as config: + contents = yaml.load(config, Loader=yaml.BaseLoader) + contents['functions'].update(sls_functions) + + with open(os.path.join(os.path.dirname(__file__), '..', '..', 'serverless.yml'), 'w+') as outfile: + yaml.dump(contents, outfile) - print("Succesfully loaded the {} driver".format(source)) @cognition_datasources.command(name='build-examples') def build_examples(): @@ -153,6 +168,7 @@ def build_examples(): example_rel_path = 'docs/example.json' def _fetch_examples(data): + print("Pulling example for {}.".format(data['name'])) with open(os.path.join(os.path.dirname(__file__), '..', '..', 'docs', 'examples', '{}.json'.format(data['name'])), 'wb+') as examplefile: r = requests.get(os.path.join(data['url'], example_rel_path)) @@ -164,12 +180,14 @@ def _fetch_examples(data): @cognition_datasources.command(name='build-docs') def build_docs(): from datasources.sources import remote + remote_assets = {k: v for (k, v) in remote.__dict__.items() if type(v) == str and 'https' in v} - docs_rel_path = 'docs/README.md' + docs_rel_path = 'README.md' build_status = [] with open(os.path.join(os.path.dirname(__file__), '..', '..', 'docs', 'datasource-reference.md'), 'wb+') as docfile: for item in remote_assets: + print("Pulling docs for {}.".format(item)) r = requests.get(os.path.join(remote_assets[item], docs_rel_path)) docfile.write(r.content) docfile.write(b"\n---\n") @@ -178,6 +196,7 @@ def build_docs(): if 'CircleCI' in lines[0]: build_status.append({'name': item, 'status': lines[0]}) + print("Finishing up.") with open(os.path.join(os.path.dirname(__file__), '..', '..', 'docs', 'datasource-status.md'), 'w+') as statusfile: statusfile.write("# Driver Status\n") statusfile.write("| Driver Name | Status |\n") @@ -193,3 +212,11 @@ def list(): print([x.__name__ for x in sources]) +# Some helper methods used by the CLI +def replace_template_name(fpath, name): + with open(fpath, 'r') as f: + contents = f.read() + contents = contents.replace('__TEMPLATENAME__', name) + + with open(fpath, 'w') as outf: + outf.write(contents) \ No newline at end of file diff --git a/datasources/sources/__init__.py b/datasources/sources/__init__.py index bd29262..2a811a5 100644 --- a/datasources/sources/__init__.py +++ b/datasources/sources/__init__.py @@ -25,16 +25,13 @@ def load_sources(): class remote(object): - CBERS = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-cbers/master" - DGOpenData = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-dgopendata/master" - ElevationTiles = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-elevationtiles/master" - Landsat8 = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-landsat8/master" - MicrosoftBuildingFootprints = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-mbf/master" - NAIP = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-naip/master" - Sentinel1 = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-sentinel1/master" - Sentinel2 = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-sentinel2/master" - SRTM = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-srtm/master" - USGS3DEP = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-usgs3dep/master" - - - + CBERS = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-cbers" + DGOpenData = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-dgopendata" + ElevationTiles = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-elevationtiles" + Landsat8 = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-landsat8" + MicrosoftBuildingFootprints = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-mbf" + NAIP = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-naip" + Sentinel1 = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-sentinel1" + Sentinel2 = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-sentinel2" + SRTM = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-srtm" + USGS3DEP = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-usgs3dep" diff --git a/datasources/stac/query.py b/datasources/stac/query.py index 82348dd..e3c9369 100644 --- a/datasources/stac/query.py +++ b/datasources/stac/query.py @@ -4,7 +4,6 @@ from schema import Schema, And from geomet import wkt -from rtree import index class STACQueryError(BaseException): @@ -121,17 +120,17 @@ def check_properties(self, asset): return False return True - def check_spatial(self, name): - static_dir = os.path.join(os.path.dirname(__file__), '..', 'static') - rtree_location = os.path.join(static_dir, '{}_rtree'.format(name)) - - try: - idx = index.Rtree(rtree_location) - return [x.object for x in idx.intersection(self.bbox(), objects=True)] - except: - # Look for rtree in current directory - try: - idx = index.Rtree('index') - return [x.object for x in idx.intersection(self.bbox(), objects=True)] - except: - raise FileNotFoundError("Could not find rtree for the datasource at the following path: {}".format(rtree_location)) + # def check_spatial(self, name): + # static_dir = os.path.join(os.path.dirname(__file__), '..', 'static') + # rtree_location = os.path.join(static_dir, '{}_rtree'.format(name)) + # + # try: + # idx = index.Rtree(rtree_location) + # return [x.object for x in idx.intersection(self.bbox(), objects=True)] + # except: + # # Look for rtree in current directory + # try: + # idx = index.Rtree('index') + # return [x.object for x in idx.intersection(self.bbox(), objects=True)] + # except: + # raise FileNotFoundError("Could not find rtree for the datasource at the following path: {}".format(rtree_location)) diff --git a/datasources/template/config.yml b/datasources/template/config.yml deleted file mode 100644 index 93d82d3..0000000 --- a/datasources/template/config.yml +++ /dev/null @@ -1 +0,0 @@ -circle-token: \ No newline at end of file diff --git a/datasources/tests.py b/datasources/tests.py index 4b4d5cf..85388f4 100644 --- a/datasources/tests.py +++ b/datasources/tests.py @@ -46,6 +46,10 @@ def test_spatial_search(self): self.manifest[self.name].search(self.spatial) response = self.manifest.execute() + # Buffering the input geometry to account for small discrepencies in S2 (especially with large area searches) + # This test passes if all returned geometries are within 3% of the average length of the polygon. + buffered_geom = self.spatial_geom.buffer(0.03 * self.spatial_geom.length / 4) + # Confirming that each output feature intersects input for feat in response[self.name]['features']: if self.spatial_mode == 'geometry': @@ -57,7 +61,7 @@ def test_spatial_search(self): [feat['bbox'][0], feat['bbox'][1]], [feat['bbox'][0], feat['bbox'][3]]]) - self.assertTrue(asset_geom.intersects(self.spatial_geom)) + self.assertTrue(asset_geom.intersects(buffered_geom)) def test_temporal_search(self): self.manifest.flush() diff --git a/docs/README.md b/docs/README.md index 04b06dd..80443b1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,5 +1,12 @@ # API Docs -The interface allows searching datasources with **spatial**, **temporal**, and **properties** parameters. Spatial is always required. Temporal is always accepted but not always honored (not all spatial datasources are temporal). Properties is always accepted but varies across drivers as different APIs have different responses. + +The [Spatio-Temporal-Asset-Catalog (STAC)](https://github.com/radiantearth/stac-spec) specification provides common metadata and API schemas to search and access geospatial data. The standardized interface used by the library is based on the STAC spec and allows searching across three dimensions: + +- **Spatial:** Find all assets which intersect a bounding box. +- **Temporal:** Find all assets acquired within a temporal window. +- **Properties:** Find all assets with certain metadata. + +Spatial is always required. Temporal is always accepted but not always honored (not all spatial datasources are temporal). Properties is always accepted but varies across drivers as different APIs have different responses. ### Spatial The standard representation of space is a [GeoJSON geometry object](https://tools.ietf.org/html/rfc7946#section-3.1): @@ -58,6 +65,12 @@ The following table shows which STAC properties are available when querying each | USGS 3DEP | [eo:epsg, pc:count, pc:type, pc:encoding] | [legacy:scan] | [limit] | | Microsoft Building Footprints | [eo:epsg] | [legacy:area, legacy:length, legacy:state] | [limit] | +Query strings are constructed using a nested dictionary notation. For example, returning all items with a cloud cover of less than 5% looks like: + +``` +{'eo:cloud_cover: {'lt': 5}} +``` + ### Response The response is a dictionary of feature collections with a key for each searched datasource. Each feature in the feature collection is a STAC Item representing a single asset returned by the query. Items returned from APIs which are not STAC compliant do not implement the standard `links` property, as there is no underlying STAC catalog to link with. Example STAC Items for each datasource can be found in the [examples folder](./examples). @@ -79,4 +92,46 @@ The response is a dictionary of feature collections with a key for each searched - Access to Sentinel-1 requires a valid Copernicus Open Access Hub account with username and password saved to the `COPERNICUS_USER` and `COPERNICUS_PASSWORD` environment variables. ### Licencing and Data Rights -This library uses the [Apache License 2.0](https://choosealicense.com/licenses/apache-2.0/) which allows for commercial use but not all datasources exposed by the library are licensed for commercial use. Please refer to the license of the underlying datasource before using commercially. \ No newline at end of file +This library uses the [Apache License 2.0](https://choosealicense.com/licenses/apache-2.0/) which allows for commercial use but not all datasources exposed by the library are licensed for commercial use. Please refer to the license of the underlying datasource before using commercially. + +### Usage Examples +#### Cloud Deployment +```python +import requests +import json + +endpoint = 'https://xxxxxxxxxx.execute-api.us-east-1.amazonaws.com/stac/search' + +payload = { + 'intersects': {'type:': 'Polygon': 'coordinates': [[...]]}, + 'temporal': ("2018-10-30", "2018-12-31"), + 'properties': {'eo:cloud_cover': {'lte': 5}}, + 'datasources': ['Landsat8', 'Sentinel2'] +} + +r = requests.post(endpoint, data=json.dumps(payload)) +response = r.json() +``` + +#### Local Deployment +```python +from datasources import Manifest + +payload = { + 'spatial': {'type:': 'Polygon': 'coordinates': [[...]]}, + 'temporal': ("2018-10-30", "2018-12-31"), + 'properties': {'eo:cloud_cover': {'lte': 5}}, +} + +manifest = Manifest() +manifest['Landsat8'].search(**payload) +manifest['Sentinel2'].search(**payload) + +response = manifest.execute() +``` + +Or with the CLI: + +``` +cognition-datasources search xmin ymin xmax ymax --start-date "2018-10-30" --end-date "2018-12-31" -d Landsat8 -d Sentinel2 --output response.json +``` diff --git a/docs/contributing.md b/docs/contributing.md index cf204c5..55bdcb0 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,9 +1,9 @@ # Contributing -The purpose of this page is to explain how to contribute to the library by building your own Datasource Driver for use with cognition-datasources. +The purpose of this page is to explain how to contribute to the library by building your own driver for use with cognition-datasources. ## Datasource Driver -A datasource driver is a high-level wrapper of the underlying API which translates between STAC-compliant and API-compliant requests/responses. It inherits a standard pattern defined in the [sources.base.Datasource](../datasources/sources/base.py) base class. Realistically, a driver will look something like this: +A datasource driver is a high-level wrapper of the underlying API which translates between STAC-compliant and API-compliant requests/responses and is very similar conceptually to the [GraphQL resolver](https://graphql.org/learn/execution/#root-fields-resolvers). It inherits a standard pattern defined in the [sources.base.Datasource](../datasources/sources/base.py) base class. Realistically, a driver will look something like this: ```python class MyDatasource(Datasource): @@ -44,7 +44,7 @@ There are a couple of things happening here, let's go over them. ##### Search method - The search method takes the STAC compliant input and generates an API-compatible request. -- User input is seperated into a couple parameters: +- User input is separated into a couple parameters: - **spatial**: geojson geometry representing the spatial extent of the query. - **temporal**: temporal range representing the temporal extent of the query. - **properties**: STAC or legacy properties used to query the API and/or filter the response. @@ -55,10 +55,10 @@ There are a couple of things happening here, let's go over them. - Executes in the main thread. ##### Execute method -- The **request** parameter of the execute method receives the API request created in the search method. +- The **request** parameter of the execute method consumes API requests stored in the `self.manifest.sources` list. - Ping the API and implement logic to parse the response into a valid STAC item. - The [datasources.stac.item.STACITem](../datasources/stac/item.py) object performs a soft validation of the STAC Item to ensure all the required fields are present. -- If the API is STAC compliant, the execute method should return the API response without any modification. If the API is not STAC compliant, it should return a list of STAC Items. +- If the API is STAC compliant, the execute method should return the API response without any modification. If the API is not STAC compliant, it should return a list of STAC Item(s). - Executes in worker threads spawned by `multiprocess.Process`. --- @@ -72,17 +72,21 @@ Our new directory will look like this: ``` . -└── FakeSat # Parent directory +└── FakeSat # Parent directory. ├── .circleci - │ └── config.yml # CircleCI configuration - ├── config.yml # CI/CD Integration - ├── docs - │ ├── example.json # Example STAC Item returned by query - │ └── README.md # Driver documentation - ├── FakeSat.py # Driver file - ├── requirements-dev.txt # Testing dependencies - ├── requirements.txt # Production dependencies - └── tests.py # Unittests + │ └── config.yml # CircleCI configuration. + ├── bin + │   └── driver-package.sh # Packages driver inside Docker container. + ├── config.yml # Driver configuration. + ├── Dockerfile # Docker container. + ├── docs + │   ├── example.json # Example STAC Item generated by driver. + ├── FakeSat.py # Driver file. + ├── handler.py # Lambda function which calls your driver. + ├── README.md # Driver documentation. + ├── requirements-dev.txt # Testing dependencies. + ├── requirements.txt # Production dependencies. + └── tests.py # Unittests. ``` The starter-project contains everything we need to build a datasource: @@ -128,7 +132,7 @@ Our fake API will have a simple response: } ``` -We can see that there are both spatial and temporal elements as well as additional properties we can map to the STAC spec (eo:gsd and eo:epsg). We will use the `search` and `execute` methods to wrap the STAC-spec around the FakeSat api. You are free to implement this logic however you like, as long as you adhere to the standard input and output patterns. +We can see that there are both spatial and temporal elements as well as additional properties we can map to the STAC spec (eo:gsd and eo:epsg). Any properties which don't fit nicely into the STAC spec may be mapped to the legacy extension. We will use the `search` and `execute` methods to wrap the STAC-spec around the FakeSat api. You are free to implement this logic however you like, as long as you adhere to the standard input and output patterns. ```python import requests @@ -169,8 +173,9 @@ class FakeSat(Datasource): api_request.update({'gsd': stac_query.properties['eo:gsd']['eq']}) if 'eo:epsg' in keys: api_request.update({'epsg': stac_query.properties['eo:epsg']['eq']}) + # Use the legacy collection for keys that don't map to STAC if 'legacy:processing_level' in keys: - api_request.update({'legacy:processing': stac_query.properties['legacy:processing']['eq']}) + api_request.update({'processing': stac_query.properties['legacy:processing']['eq']}) # Append to manifest self.manifest.searches.append([self, api_request]) @@ -255,7 +260,15 @@ class FakeSatTestCases(tests.BaseTestCases): self.limit = 10 ``` -You can add additional test cases as needed. +You can add additional test cases as needed. The easiest way to run test cases is via Docker: + +``` +# Build Docker container +docker build . -t fakesat-driver:latest + +# Run tests +docker run --rm -v $PWD:/home/cognition-datasources -it fakesat-driver:latest python -m unittest tests.py +``` **(4). Update `requirements.txt` and `requirements-dev.txt` with any dependencies required by your driver.** @@ -271,8 +284,6 @@ CircleCI is a simple, cloud-hosted, continuous integration system with good inte 2. Click `Add Projects` on the side of the dashboard and then `Set Up Project` next to the appropriate repository. 3. Click `Start building` -Done! - **(7). Add your CircleCI build API key to `config.yml`** Cognition-datasources requires access to your project's API Key to determine whether or not the driver has built succesfully. You can obtain your project-specific API Key using the following instructions. @@ -289,17 +300,65 @@ Cognition-datasources requires access to your project's API Key to determine whe 2. Click `StatusBadges` 3. Ensure `Embed Code` is set to Markdown and copy/paste the code to the first line of `docs/README.md`. -**(9). Register your driver in cognition-datasources via pull request** -Register your driver in [datasources.sources.__init__.py](../datasources/sources/__init__.py) by creating a class attribute in the `remote` object containing the url to the driver's master branch. Make sure the url is pointing to raw github content. +**(9). Deploy your driver as an AWS Lambda Layer.** + +``` +# Build Docker container +docker build . -t fakesat-driver:latest + +# Package the layer +docker run --rm -v $PWD:/home/cognition-datasources -it fakesat-driver:latest driver-package.sh + +# Deploy layer to lambda +aws lambda publish-layer-version \ + --layer-name fakesat-driver \ + --zip-file fileb://lambda-layer.zip + +# Make layer public +aws lambda add-layer-version-permission --layer-name fakesat-driver \ + --statement-id public --version-number 1 --principal '*' \ + --action lambda:GetLayerVersion +``` + +**(10). Add your layer's arn to `config.yml`, making sure to include the version tag.** + +**(11). Register your driver in cognition-datasources via pull request** +Register your driver in [datasources.sources.__init__.py](../datasources/sources/__init__.py) by creating a class attribute in the `remote` object containing the url to the driver's master branch. Make sure the url is pointing to [jsDelivr](https://www.jsdelivr.com/). ```python class remote(object): - FakeSat = "https://raw.githubusercontent.com/geospatial-jeff/cognition-datasources-fakesat/master" + FakeSat = "https://cdn.jsdelivr.net/gh/geospatial-jeff/cognition-datasources-fakesat" ``` -Submit the pull request into dev and your driver will be included with the next release! Another user would hypothetically be able to load our fake driver with `cognition-datasources load -d FakeSat`! +Submit the pull request into dev and your driver will be included with the next release! Another user can load our fake driver with `cognition-datasources load -d FakeSat`! + +## Additional Notes +### Spatial Handling +A common solution when working with datasources which don't directly expose spatial queries is to save a spatial coverage of the dataset to a database (ex. PostGIS). This isn't a viable option for cognition-datasources for several reasons. The library supports packaging spatial coverages with your driver through an [AWS Lambda Spatial Database](https://github.com/geospatial-jeff/lambda-layer-spatial-db). The coverage is written to disk and saved to an AWS Lambda Layer which is loaded by cognition-datasources in addition to the driver layer itself. + +Let's pretend the FakeSat API wasn't an API but a FTP server with a flat file structure of images. In order to expose a spatial query on the underlying dataset, we can write a program which crawls the FTP server and generates spatial coverages from image metadata. We can then package the spatial coverages with our driver to satisfy the spatial requirements of the STAC query. + +**(1). Clone the `lambda-layer-spatial-db` library into the `spatial-db` folder.** + +``` +git clone https://github.com/geospatial-jeff/lambda-layer-spatial-db.git spatial-db +cd spatial-db +``` + +**(2). Follow the [database-docs](https://github.com/geospatial-jeff/lambda-layer-spatial-db/blob/master/docs/README.md) to package and deploy your spatial coverages as an AWS Lambda Layer.** + +**(3). Update your driver's `Dockerfile` to pull from `geospatialjeff/cognition-datasources-db:latest`.** + +**(4). Update your CircleCI configurations (`.circleci/config.yml`) docker image to pull from `geospatialjeff/cognition-datasources-db:latest`** + +**(5). Add a `db-arn` key in your driver's configuration (`config.yml`) which maps to your database layer ARN.** + +You can now perform a basic bounding box query on the packaged spatial coverages from within our driver. For an implementation example, see the [NAIP driver](https://github.com/geospatial-jeff/cognition-datasources-naip). + +### How It Works +Upon initialization, cognition-datasources uses a [simple loader](../datasources/sources/__init__.py) (see `collections.load_sources`) which loads all drivers found in the `./datasources/sources` folder. When installing locally, the driver file (`FakeSat.py`) is moved into the `sources` folder which allows local calls to cognition-datasources. The serverless deployment packages each datasource as a lambda function which takes advantage of how AWS Lambda Layers are merged at runtime. + +![title](images/lambda-environment.png) -### Additional Notes -##### Spatial Handling -A common solution when working with datasources which don't directly expose spatial queries is to save a spatial coverage of the dataset to a database (ex. PostGIS). This isn't a viable option for cognition-datasources for many reasons. Cognition supports spatial coverages through the [Rtree](http://toblerity.org/rtree/) package which provides a simple interface for building and saving an r-tree spatial index to disk. The r-tree acts as a cheapo spatial database, supports basic bounding box queries (which is nicely wrapped in `datasources.stac.query.STACQuery`), and easily shipped with your driver upon load. It doesn't matter how you construct the r-tree, but it should be saved to `index.idx` and `index.dat` in the root directory of the repository. You are also encouraged to write a simple script for regenerating the r-tree, especially if the underlying datasource changes frequently. See the [NAIP driver](https://github.com/geospatial-jeff/cognition-datasources-naip) for an example. \ No newline at end of file +Each lambda function pulls from two lambda layers (three if packaged with spatial coverages): the cognition-datasources layer and the driver layer. When the layers are merged at runtime, the driver file is placed into the appropriate folder which allows cognition-datasources to successfully load the driver inside `handler.py`. \ No newline at end of file diff --git a/docs/datasource-reference.md b/docs/datasource-reference.md index 7c951c1..57e91a5 100644 --- a/docs/datasource-reference.md +++ b/docs/datasource-reference.md @@ -44,17 +44,16 @@ - There is no source API for this datasource, instead an index is created with the [dg-open-data-scraper](https://github.com/geospatial-jeff/dg-open-data-scraper). --- [![CircleCI](https://circleci.com/gh/geospatial-jeff/cognition-datasources-elevationtiles.svg?style=svg)](https://circleci.com/gh/geospatial-jeff/cognition-datasources-elevationtiles) - ## Elevation Tiles -| Parameter | Status | + | Parameter | Status | | ----------| ------ | | Spatial | :heavy_check_mark: | | Temporal | :x: | | Properties | :heavy_check_mark: | | **kwargs | [limit, zoom] | -##### Properties + ##### Properties | Property | Type | Example | |--------------------------|-------|-------------| | eo:gsd | float | 305.74 | @@ -64,7 +63,7 @@ | legacy:y | int | 91 | | legacy:z | int | 8 | -##### Notes + ##### Notes - The source API is a XYZ tiled elevation service. The `zoom` kwarg changes the zoom level being queried. - The source API doesn't support temporal data. Can search with temporal but it is not honored. --- @@ -136,6 +135,7 @@ | eo:epsg | int | 26914 | | eo:instrument | str | 'Leica ADS100' | --- + [![CircleCI](https://circleci.com/gh/geospatial-jeff/cognition-datasources-sentinel1.svg?style=svg)](https://circleci.com/gh/geospatial-jeff/cognition-datasources-sentinel1) ## Sentinel1 @@ -160,7 +160,7 @@ --- [![CircleCI](https://circleci.com/gh/geospatial-jeff/cognition-datasources-sentinel2.svg?style=svg)](https://circleci.com/gh/geospatial-jeff/cognition-datasources-sentinel2) -## Sentinel2 +## Sentinel1 | Parameter | Status | | ----------| ------ | diff --git a/docs/deployment.md b/docs/deployment.md new file mode 100644 index 0000000..014e5d4 --- /dev/null +++ b/docs/deployment.md @@ -0,0 +1,59 @@ +# Deployment +The purpose of this page is to explain how to deploy your own instance of cognition-datasources. The library may be deployed to either the cloud (via [Serverless Framework](https://serverless.com/)) or locally. The cloud deployment packages each driver as an AWS Lambda function while the local deployment installs drivers to your local installation of cognition-datasources. For more information see [How It Works](./contributing.md#how-it-works) + +Drivers which also package spatial coverages are currently not compatible with local installations. + +## Cloud Deployment (AWS Lambda + API Gateway) +**(1). Clone the library and install the CLI.** + +``` +git clone https://github.com/geospatial-jeff/cognition-datasources my-cd-deployment +cd my-cd-deployment +python setup.py develop +``` + +**(2). Load datasources into your deployment.** + +``` +cognition-datasources -d Landsat8 -d Sentinel2 +``` + +This command will populate `serverless.yml` and `handler.py` with all of the necessary configuration and code to create the service. Each driver is packaged as its own lambda function. + +**(3). Build docker container** + +``` +docker build . -t cognition-datasources:latest +``` + +**(4). Package service** + +``` +docker run --rm -v $PWD:/home/cognition-datasources -it cognition-datasources:latest package-service.sh +``` + +**(5). Edit the configuration variables in `serverless.yml`** + +**(6). Deploy the service to AWS via Serverless Framework** + +``` +sls deploy -v +``` + +The deployment generates an AWS API Gateway endpoint which supports STAC-compliant searches of the loaded datasources through the `/stac/search` endpoint (POST). + + +## Local Deployment +**(1). Clone the library and install the CLI.** + +``` +pip install git+https://github.com/geospatial-jeff/cognition-datasources +``` + +**(2). Load datasources into your deployment while enabling the `local` flag.** + +``` +cognition-datasources -d Landsat8 -d Sentinel2 --local +``` + +While the local flag is enabled, the driver and its dependencies will be installed locally. The driver is stored in the `./datasources/sources/` folder while all dependencies are installed to the default location of the current environment. diff --git a/docs/images/api-diagram.png b/docs/images/api-diagram.png deleted file mode 100644 index ae1d356..0000000 Binary files a/docs/images/api-diagram.png and /dev/null differ diff --git a/docs/images/lambda-environment.png b/docs/images/lambda-environment.png new file mode 100644 index 0000000..42627cc Binary files /dev/null and b/docs/images/lambda-environment.png differ diff --git a/docs/images/service-diagram.png b/docs/images/service-diagram.png new file mode 100644 index 0000000..ec2d91c Binary files /dev/null and b/docs/images/service-diagram.png differ diff --git a/docs/quickstart.ipynb b/docs/python-api-usage.ipynb similarity index 100% rename from docs/quickstart.ipynb rename to docs/python-api-usage.ipynb diff --git a/datasources/template/.circleci/config.yml b/driver/.circleci/config.yml similarity index 78% rename from datasources/template/.circleci/config.yml rename to driver/.circleci/config.yml index f7ab26a..5db6bc3 100644 --- a/datasources/template/.circleci/config.yml +++ b/driver/.circleci/config.yml @@ -8,7 +8,7 @@ jobs: docker: # specify the version you desire here # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers` - - image: circleci/python:3.6.1 + - image: geospatialjeff/cognition-datasources:latest # Specify service dependencies here if necessary # CircleCI maintains a library of pre-built images @@ -30,15 +30,14 @@ jobs: - run: name: install dependencies command: | - python3 -m venv venv - . venv/bin/activate - sudo apt-get install libspatialindex-dev pip install -r requirements-dev.txt -# - run: -# name: "Setup custom environment variables" -# command: | -# echo 'export HELLO="WORLD"' >> $BASH_ENV + - run: + name: "Setup custom environment variables" + command: | + echo 'export LAMBDA_DB_PATH="/root/repo/spatial-db/lambda_db/database.fs"' >> $BASH_ENV + echo 'export PYTHONPATH="${PYTHONPATH}:/root/repo/spatial-db/lambda_db/"' >> $BASH_ENV + # - save_cache: @@ -54,8 +53,7 @@ jobs: - run: name: run tests command: | - . venv/bin/activate - python -m unittest tests.py + python3 -m unittest tests.py - store_artifacts: path: test-reports diff --git a/driver/Dockerfile b/driver/Dockerfile new file mode 100644 index 0000000..e6fa8c9 --- /dev/null +++ b/driver/Dockerfile @@ -0,0 +1,23 @@ +FROM geospatialjeff/cognition-datasources:latest + +COPY requirements*.txt ./ + +# Paths to things +ENV \ + PROD_LIBS=/build/prod \ + PYTHONPATH=$PYTHONPATH:/$PROD_LIBS/lib/python3.6/site-packages:/home/cognition-datasources/spatial-db/lambda_db \ + LAMBDA_DB_PATH=/home/cognition-datasources/spatial-db/lambda_db/database.fs + +# Install requirements into seperate folders +RUN \ + mkdir $PROD_LIBS; \ + pip install -r requirements-dev.txt; \ + pip install -r requirements.txt --install-option="--prefix=$PROD_LIBS" --ignore-installed; + +COPY bin/* /usr/local/bin/ + +# Giving exec permissions to script +RUN \ + chmod +x /usr/local/bin/driver-package.sh + +WORKDIR /home/cognition-datasources \ No newline at end of file diff --git a/datasources/template/docs/README.md b/driver/README.md similarity index 97% rename from datasources/template/docs/README.md rename to driver/README.md index b115422..98269f3 100644 --- a/datasources/template/docs/README.md +++ b/driver/README.md @@ -1,6 +1,6 @@ **Add CircleCI badge to first line of file** -## MyDataSource +## __TEMPLATENAME__ | Parameter | Status | | ----------| ------ | diff --git a/driver/bin/driver-package.sh b/driver/bin/driver-package.sh new file mode 100644 index 0000000..c63cba5 --- /dev/null +++ b/driver/bin/driver-package.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# directory used for deployment +export DEPLOY_DIR=lambda + +DRIVERNAME=__TEMPLATENAME__ +PYPATH=$PROD_LIBS/lib/python3.6/site-packages + +echo "Creating lambda layer" + +# Moving python libraries +mkdir -p $DEPLOY_DIR/python +EXCLUDE="click* urllib3* s3transfer* boto3* botocore* pip* docutils* *.pyc setuptools* wheel* coverage* testfixtures* mock* *.egg-info *.dist-info __pycache__ easy_install.py" + + +EXCLUDES=() +for E in ${EXCLUDE} +do + EXCLUDES+=("--exclude ${E} ") +done + +rsync -ax $PYPATH/ $DEPLOY_DIR/python/ ${EXCLUDES[@]} + +# Copying driver to cognition-datasources folder +mkdir -p $DEPLOY_DIR/python/datasources/sources/ +cp $DRIVERNAME.py $DEPLOY_DIR/python/datasources/sources/ + + +# Make lambda layer +cd $DEPLOY_DIR +zip -ruq ../lambda-layer.zip ./ diff --git a/driver/config.yml b/driver/config.yml new file mode 100644 index 0000000..73d6815 --- /dev/null +++ b/driver/config.yml @@ -0,0 +1,2 @@ +circle-token: +layer-arn: \ No newline at end of file diff --git a/driver/docs/README.md b/driver/docs/README.md new file mode 100644 index 0000000..058ce67 --- /dev/null +++ b/driver/docs/README.md @@ -0,0 +1,35 @@ +# External Drivers + +1. Add driver requirements to `requirements.txt` and `requirements-dev.txt` +2. Build docker image + +``` +docker build . -t :latest +``` + +3. Run test cases inside docker container + +``` +docker run --rm -v $PWD:/home/cognition-datasources -it :latest python -m unittest tests.py +``` + +4. Build lambda layer + +``` +docker run --rm -v $PWD:/home/cognition-datasources -it +``` + +5. Deploy layer to lambda +``` +aws lambda publish-layer-version \ + --layer-name \ + --zip-file fileb://lambda-deploy.zip +``` + +6. Make layer public (do this after deploying a new version) +``` +aws lambda add-layer-version-permission --layer-name \ + --statement-id public --version-number 1 --principal '*' \ + --action lambda:GetLayerVersion +``` + diff --git a/datasources/template/docs/example.json b/driver/docs/example.json similarity index 100% rename from datasources/template/docs/example.json rename to driver/docs/example.json diff --git a/driver/handler.py b/driver/handler.py new file mode 100644 index 0000000..3498d05 --- /dev/null +++ b/driver/handler.py @@ -0,0 +1,9 @@ +from datasources import Manifest + +def __TEMPLATENAME__(event, context): + manifest = Manifest() + manifest['__TEMPLATENAME__'].search(**event) + response = manifest.execute() + return response + + diff --git a/datasources/template/requirements-dev.txt b/driver/requirements-dev.txt similarity index 54% rename from datasources/template/requirements-dev.txt rename to driver/requirements-dev.txt index bb7824b..5981961 100644 --- a/datasources/template/requirements-dev.txt +++ b/driver/requirements-dev.txt @@ -1,3 +1,2 @@ -git+https://github.com/geospatial-jeff/cognition-datasources.git git+https://github.com/geospatial-jeff/stac-validator.git shapely==1.6.4.post2 \ No newline at end of file diff --git a/datasources/template/requirements.txt b/driver/requirements.txt similarity index 100% rename from datasources/template/requirements.txt rename to driver/requirements.txt diff --git a/datasources/template/template.py b/driver/template.py similarity index 100% rename from datasources/template/template.py rename to driver/template.py diff --git a/datasources/template/tests.py b/driver/tests.py similarity index 100% rename from datasources/template/tests.py rename to driver/tests.py diff --git a/handler.py b/handler.py new file mode 100644 index 0000000..14f2340 --- /dev/null +++ b/handler.py @@ -0,0 +1,94 @@ +import os +from multiprocessing import Process, Pipe +import json + +from datasources import Manifest +import boto3 + +service = os.environ['SERVICE_NAME'] +stage = os.environ['SERVICE_STAGE'] +region = os.environ['SERVICE_REGION'] + +lambda_client = boto3.client('lambda') + +def worker(event, context): + + def lambda_invoke(service, stage, source, args, conn): + response = lambda_client.invoke( + FunctionName=f"{service}-{stage}-{source}", + InvocationType="RequestResponse", + Payload=json.dumps(args) + ) + + conn.send(json.loads(response['Payload'].read())) + conn.close() + + package = json.loads(event['body']) + params = list(package) + args = {} + out_d = {} + + if 'time' in params: + args.update({'temporal': package['time'].split('/')}) + else: + args.update({'temporal': None}) + + if 'intersects' in params: + args.update({'spatial': package['intersects']}) + elif 'bbox' in params: + geoj = { + "type": "Polygon", + "coordinates": [ + [ + [package['bbox'][0], package['bbox'][3]], + [package['bbox'][2], package['bbox'][3]], + [package['bbox'][2], package['bbox'][1]], + [package['bbox'][0], package['bbox'][1]], + [package['bbox'][0], package['bbox'][3]] + ] + ] + } + args.update({'spatial': geoj}) + else: + raise ValueError("Spatial parameter is required") + + if 'properties' in params: + args.update({'properties': package['properties']}) + else: + args.update({'properties': None}) + + if 'limit' in params: + args.update({'kwargs': {'limit': package['limit']}}) + else: + args.update({'kwargs': {'limit': 10}}) + + processes = [] + parent_connections = [] + + for source in package['datasources']: + parent_conn, child_conn = Pipe() + parent_connections.append(parent_conn) + + process = Process(target=lambda_invoke, args=(service, stage, source, args, child_conn)) + processes.append(process) + + for process in processes: + process.start() + + for process in processes: + process.join() + + for parent_connection in parent_connections: + response = parent_connection.recv() + for item in response: + if item not in list(out_d): + out_d.update({item: response[item]}) + else: + out_d[item]['features'] += response[item]['features'] + + return { + 'statusCode': 200, + 'body': json.dumps(out_d) + } + + diff --git a/requirements-dev.txt b/requirements-dev.txt index 88e2add..d3923f6 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,6 @@ Click==7.0 geomet==0.2.0.post2 pyyaml==5.1 requests==2.21.0 -Rtree==0.8.3 schema==0.6.8 Shapely==1.6.4.post2 diff --git a/requirements.txt b/requirements.txt index e8e7531..2cb4938 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,4 @@ Click==7.0 geomet==0.2.0.post2 pyyaml==5.1 requests==2.21.0 -Rtree==0.8.3 schema==0.6.8 diff --git a/serverless.yml b/serverless.yml new file mode 100644 index 0000000..2ec2d77 --- /dev/null +++ b/serverless.yml @@ -0,0 +1,46 @@ +######################## +# Edit these variables # +######################## +custom: + service-name: cognition-datasources-deploy + stage: dev + +#################################################################################### +# Don't edit below (unless adding environment variables to `provider.environment`) # +#################################################################################### + +functions: + worker: + handler: handler.worker + events: + - http: + path: /stac/search + method: post + layers: + - arn:aws:lambda:us-east-1:725820063953:layer:cognition-datasources:6 + +provider: + environment: + SERVICE_STAGE: ${self:provider.stage} + SERVICE_NAME: ${self:service} + SERVICE_REGION: ${self:provider.region} + + iamRoleStatementsName: cognition-datasources-role + iamRoleStatements: + - Action: + - lambda:InvokeFunction + - s3:* + Effect: Allow + Resource: + - arn:aws:lambda:* + - arn:aws:s3:::* + + name: aws + region: us-east-1 + runtime: python3.6 + stage: ${self:custom.stage} + +service: ${self:custom.service-name} + +package: + artifact: lambda-service-package.zip \ No newline at end of file diff --git a/setup.py b/setup.py index 808a548..09c12c8 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ requirements = [line.rstrip() for line in reqs] setup(name="cognition_datasources", - version='0.2', + version='0.3', author='Jeff Albrecht', author_email='geospatialjeff@gmail.com', packages=find_packages(exclude=['docs']),