From 40442fa4523ada2c6bcb2b47177abadd35e0291e Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Tue, 16 May 2023 17:04:22 +0000 Subject: [PATCH 01/51] Initial Commit. --- weather_dl_v2/fastapi-server/Dockerfile | 1 + weather_dl_v2/fastapi-server/README.md | 38 + weather_dl_v2/fastapi-server/__init__.py | 0 .../fastapi-server/config_files/example.cfg | 32 + .../config_processing/config.py | 101 +++ .../config_processing/manifest.py | 714 ++++++++++++++++++ .../config_processing/parsers.py | 448 +++++++++++ .../config_processing/partition.py | 102 +++ .../config_processing/pipeline.py | 32 + .../config_processing/stores.py | 108 +++ .../fastapi-server/config_processing/util.py | 187 +++++ weather_dl_v2/fastapi-server/environment.yml | 15 + weather_dl_v2/fastapi-server/example.cfg | 32 + .../fastapi-server/firestore_db/db.py | 19 + weather_dl_v2/fastapi-server/main.py | 27 + .../fastapi-server/routers/download.py | 81 ++ .../fastapi-server/routers/license.py | 83 ++ .../routers/license_priority.py | 51 ++ weather_dl_v2/fastapi-server/server.yaml | 1 + 19 files changed, 2072 insertions(+) create mode 100644 weather_dl_v2/fastapi-server/Dockerfile create mode 100644 weather_dl_v2/fastapi-server/README.md create mode 100644 weather_dl_v2/fastapi-server/__init__.py create mode 100644 weather_dl_v2/fastapi-server/config_files/example.cfg create mode 100644 weather_dl_v2/fastapi-server/config_processing/config.py create mode 100644 weather_dl_v2/fastapi-server/config_processing/manifest.py create mode 100644 weather_dl_v2/fastapi-server/config_processing/parsers.py create mode 100644 weather_dl_v2/fastapi-server/config_processing/partition.py create mode 100644 weather_dl_v2/fastapi-server/config_processing/pipeline.py create mode 100644 weather_dl_v2/fastapi-server/config_processing/stores.py create mode 100644 weather_dl_v2/fastapi-server/config_processing/util.py create mode 100644 weather_dl_v2/fastapi-server/environment.yml create mode 100644 weather_dl_v2/fastapi-server/example.cfg create mode 100644 weather_dl_v2/fastapi-server/firestore_db/db.py create mode 100644 weather_dl_v2/fastapi-server/main.py create mode 100644 weather_dl_v2/fastapi-server/routers/download.py create mode 100644 weather_dl_v2/fastapi-server/routers/license.py create mode 100644 weather_dl_v2/fastapi-server/routers/license_priority.py create mode 100644 weather_dl_v2/fastapi-server/server.yaml diff --git a/weather_dl_v2/fastapi-server/Dockerfile b/weather_dl_v2/fastapi-server/Dockerfile new file mode 100644 index 00000000..9e2025a7 --- /dev/null +++ b/weather_dl_v2/fastapi-server/Dockerfile @@ -0,0 +1 @@ +# TODO: Write a docker file to create the image of FastAPI server. \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md new file mode 100644 index 00000000..1d0cfa0a --- /dev/null +++ b/weather_dl_v2/fastapi-server/README.md @@ -0,0 +1,38 @@ +# Deployment Instructions & General Notes + +* **How to create environment:** +``` +conda env create --name weather-dl-v2-server --file=environment.yml + +conda activate weather-dl-v2-server +``` + +* **To run fastapi server:** +``` +uvicorn main:app --reload +``` + +* Open your browser at http://127.0.0.1:8000. + +* **Create docker image for server**: +``` +export PROJECT_ID= +export REPO= eg:weather-tools + +gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-server" --timeout=79200 --machine-type=e2-highcpu-32 +``` + +* **Deploy fastapi server on kubernetes:** +``` +kubectl apply -f server.yaml --force +``` + +## General Commands +* **For viewing the current pods**: +``` +kubectl get pods +``` + +* **For deleting existing deployment**: +``` +kubectl delete -f server.yaml --force \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/__init__.py b/weather_dl_v2/fastapi-server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/fastapi-server/config_files/example.cfg b/weather_dl_v2/fastapi-server/config_files/example.cfg new file mode 100644 index 00000000..aa7418ea --- /dev/null +++ b/weather_dl_v2/fastapi-server/config_files/example.cfg @@ -0,0 +1,32 @@ +[parameters] +client=mars + +target_path=gs:// 'Config': + config_instance = cls() + for section_key, section_value in config.items(): + if section_key == "parameters": + for key, value in section_value.items(): + if hasattr(config_instance, key): + setattr(config_instance, key, value) + else: + config_instance.kwargs[key] = value + if section_key == "selection": + config_instance.selection = section_value + return config_instance + + +def optimize_selection_partition(selection: t.Dict) -> t.Dict: + """Compute right-hand-side values for the selection section of a single partition. + + Used to support custom syntax and optimizations, such as 'all'. + """ + selection_ = copy.deepcopy(selection) + + if 'day' in selection_.keys() and selection_['day'] == 'all': + year, month = selection_['year'], selection_['month'] + + multiples_error = "Cannot use keyword 'all' on selections with multiple '{type}'s." + + if isinstance(year, list): + assert len(year) == 1, multiples_error.format(type='year') + year = year[0] + + if isinstance(month, list): + assert len(month) == 1, multiples_error.format(type='month') + month = month[0] + + if isinstance(year, str): + assert '/' not in year, multiples_error.format(type='year') + + if isinstance(month, str): + assert '/' not in month, multiples_error.format(type='month') + + year, month = int(year), int(month) + + _, n_days_in_month = calendar.monthrange(year, month) + + selection_['date'] = f'{year:04d}-{month:02d}-01/to/{year:04d}-{month:02d}-{n_days_in_month:02d}' + del selection_['day'] + del selection_['month'] + del selection_['year'] + + return selection_ diff --git a/weather_dl_v2/fastapi-server/config_processing/manifest.py b/weather_dl_v2/fastapi-server/config_processing/manifest.py new file mode 100644 index 00000000..e0a0a6e4 --- /dev/null +++ b/weather_dl_v2/fastapi-server/config_processing/manifest.py @@ -0,0 +1,714 @@ +"""Client interface for connecting to a manifest.""" + +import abc +import collections +import dataclasses +import datetime +import enum +import json +import logging +import os +import pandas as pd +import threading +import time +import traceback +import typing as t +from urllib.parse import urlparse, parse_qsl + +from .util import ( + to_json_serializable_type, + fetch_geo_polygon, + get_file_size, + get_wait_interval, + generate_md5_hash, + retry_with_exponential_backoff, + GLOBAL_COVERAGE_AREA +) + +import firebase_admin +from firebase_admin import firestore +from google.cloud import bigquery +from google.cloud.firestore_v1 import DocumentReference +from google.cloud.firestore_v1.types import WriteResult + +"""An implementation-dependent Manifest URI.""" +Location = t.NewType('Location', str) + +logger = logging.getLogger(__name__) + + +class ManifestException(Exception): + """Errors that occur in Manifest Clients.""" + pass + + +class Stage(enum.Enum): + """A request can be either in one of the following stages at a time: + + fetch : This represents request is currently in fetch stage i.e. request placed on the client's server + & waiting for some result before starting download (eg. MARS client). + download : This represents request is currently in download stage i.e. data is being downloading from client's + server to the worker's local file system. + upload : This represents request is currently in upload stage i.e. data is getting uploaded from worker's local + file system to target location (GCS path). + retrieve : In case of clients where there is no proper separation of fetch & download stages (eg. CDS client), + request will be in the retrieve stage i.e. fetch + download. + """ + RETRIEVE = 'retrieve' + FETCH = 'fetch' + DOWNLOAD = 'download' + UPLOAD = 'upload' + + +class Status(enum.Enum): + """Depicts the request's state status: + + scheduled : A request partition is created & scheduled for processing. + Note: Its corresponding state can be None only. + in-progress : This represents the request state is currently in-progress (i.e. running). + The next status would be "success" or "failure". + success : This represents the request state execution completed successfully without any error. + failure : This represents the request state execution failed. + """ + SCHEDULED = 'scheduled' + IN_PROGRESS = 'in-progress' + SUCCESS = 'success' + FAILURE = 'failure' + + +@dataclasses.dataclass +class DownloadStatus(): + """Data recorded in `Manifest`s reflecting the status of a download.""" + + """The name of the config file associated with the request.""" + config_name: str = "" + + """Represents the dataset field of the configuration.""" + dataset: t.Optional[str] = "" + + """Copy of selection section of the configuration.""" + selection: t.Dict = dataclasses.field(default_factory=dict) + + """Location of the downloaded data.""" + location: str = "" + + """Represents area covered by the shard.""" + area: str = "" + + """Current stage of request : 'fetch', 'download', 'retrieve', 'upload' or None.""" + stage: t.Optional[Stage] = None + + """Download status: 'scheduled', 'in-progress', 'success', or 'failure'.""" + status: t.Optional[Status] = None + + """Cause of error, if any.""" + error: t.Optional[str] = "" + + """Identifier for the user running the download.""" + username: str = "" + + """Shard size in GB.""" + size: t.Optional[float] = 0 + + """A UTC datetime when download was scheduled.""" + scheduled_time: t.Optional[str] = "" + + """A UTC datetime when the retrieve stage starts.""" + retrieve_start_time: t.Optional[str] = "" + + """A UTC datetime when the retrieve state ends.""" + retrieve_end_time: t.Optional[str] = "" + + """A UTC datetime when the fetch state starts.""" + fetch_start_time: t.Optional[str] = "" + + """A UTC datetime when the fetch state ends.""" + fetch_end_time: t.Optional[str] = "" + + """A UTC datetime when the download state starts.""" + download_start_time: t.Optional[str] = "" + + """A UTC datetime when the download state ends.""" + download_end_time: t.Optional[str] = "" + + """A UTC datetime when the upload state starts.""" + upload_start_time: t.Optional[str] = "" + + """A UTC datetime when the upload state ends.""" + upload_end_time: t.Optional[str] = "" + + @classmethod + def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus': + """Instantiate DownloadStatus dataclass from dict.""" + download_status_instance = cls() + for key, value in download_status.items(): + if key == 'status': + setattr(download_status_instance, key, Status(value)) + elif key == 'stage' and value is not None: + setattr(download_status_instance, key, Stage(value)) + else: + setattr(download_status_instance, key, value) + return download_status_instance + + @classmethod + def to_dict(cls, instance) -> t.Dict: + """Return the fields of a dataclass instance as a manifest ingestible + dictionary mapping of field names to field values.""" + download_status_dict = {} + for field in dataclasses.fields(instance): + key = field.name + value = getattr(instance, field.name) + if isinstance(value, Status) or isinstance(value, Stage): + download_status_dict[key] = value.value + elif isinstance(value, pd.Timestamp): + download_status_dict[key] = value.isoformat() + elif key == 'selection' and value is not None: + download_status_dict[key] = json.dumps(value) + else: + download_status_dict[key] = value + return download_status_dict + + +@dataclasses.dataclass +class Manifest(abc.ABC): + """Abstract manifest of download statuses. + + Update download statuses to some storage medium. + + This class lets one indicate that a download is `scheduled` or in a transaction process. + In the event of a transaction, a download will be updated with an `in-progress`, `success` + or `failure` status (with accompanying metadata). + + Example: + ``` + my_manifest = parse_manifest_location(Location('fs://some-firestore-collection')) + + # Schedule data for download + my_manifest.schedule({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username') + + # ... + + # Initiate a transaction – it will record that the download is `in-progess` + with my_manifest.transact({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username') as tx: + # download logic here + pass + + # ... + + # on error, will record the download as a `failure` before propagating the error. By default, it will + # record download as a `success`. + ``` + + Attributes: + location: An implementation-specific manifest URI. + status: The current `DownloadStatus` of the Manifest. + """ + + location: Location + # To reduce the impact of _read() and _update() calls + # on the start time of the stage. + prev_stage_precise_start_time: t.Optional[str] = None + status: t.Optional[DownloadStatus] = None + + # This is overridden in subclass. + def __post_init__(self): + """Initialize the manifest.""" + pass + + def schedule(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Indicate that a job has been scheduled for download. + + 'scheduled' jobs occur before 'in-progress', 'success' or 'finished'. + """ + scheduled_time = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat(timespec='seconds') + self.status = DownloadStatus( + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), + username=user, + stage=None, + status=Status.SCHEDULED, + error=None, + size=None, + scheduled_time=scheduled_time, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=None, + upload_end_time=None, + ) + self._update(self.status) + + def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Updates the manifest to mark the shards that were skipped in the current job + as 'upload' stage and 'success' status, indicating that they have already been downloaded. + """ + old_status = self._read(location) + # The manifest needs to be updated for a skipped shard if its entry is not present, or + # if the stage is not 'upload', or if the stage is 'upload' but the status is not 'success'. + if old_status.location != location or old_status.stage != Stage.UPLOAD or old_status.status != Status.SUCCESS: + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + size = get_file_size(location) + + status = DownloadStatus( + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), + username=user, + stage=Stage.UPLOAD, + status=Status.SUCCESS, + error=None, + size=size, + scheduled_time=None, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=current_utc_time, + upload_end_time=current_utc_time, + ) + self._update(status) + logger.info(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + + def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Reset Manifest state in preparation for a new transaction.""" + self.status = dataclasses.replace(self._read(location)) + self.status.config_name = config_name + self.status.dataset = dataset if dataset else None + self.status.selection = selection + self.status.location = location + self.status.username = user + + def __enter__(self) -> None: + pass + + def __exit__(self, exc_type, exc_inst, exc_tb) -> None: + """Record end status of a transaction as either 'success' or 'failure'.""" + if exc_type is None: + status = Status.SUCCESS + error = None + else: + status = Status.FAILURE + # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception + error = '\n'.join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + + new_status = dataclasses.replace(self.status) + new_status.error = error + new_status.status = status + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + # This is necessary for setting the precise start time of the previous stage + # and end time of the final stage, as well as handling the case of Status.FAILURE. + if new_status.stage == Stage.FETCH: + new_status.fetch_start_time = self.prev_stage_precise_start_time + new_status.fetch_end_time = current_utc_time + elif new_status.stage == Stage.RETRIEVE: + new_status.retrieve_start_time = self.prev_stage_precise_start_time + new_status.retrieve_end_time = current_utc_time + elif new_status.stage == Stage.DOWNLOAD: + new_status.download_start_time = self.prev_stage_precise_start_time + new_status.download_end_time = current_utc_time + else: + new_status.upload_start_time = self.prev_stage_precise_start_time + new_status.upload_end_time = current_utc_time + + new_status.size = get_file_size(new_status.location) + + self.status = new_status + + self._update(self.status) + + def transact(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> 'Manifest': + """Create a download transaction.""" + self._set_for_transaction(config_name, dataset, selection, location, user) + return self + + def set_stage(self, stage: Stage) -> None: + """Sets the current stage in manifest.""" + prev_stage = self.status.stage + new_status = dataclasses.replace(self.status) + new_status.stage = stage + new_status.status = Status.IN_PROGRESS + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + if stage == Stage.FETCH: + new_status.fetch_start_time = current_utc_time + elif stage == Stage.RETRIEVE: + new_status.retrieve_start_time = current_utc_time + elif stage == Stage.DOWNLOAD: + new_status.fetch_start_time = self.prev_stage_precise_start_time + new_status.fetch_end_time = current_utc_time + new_status.download_start_time = current_utc_time + else: + if prev_stage == Stage.DOWNLOAD: + new_status.download_start_time = self.prev_stage_precise_start_time + new_status.download_end_time = current_utc_time + else: + new_status.retrieve_start_time = self.prev_stage_precise_start_time + new_status.retrieve_end_time = current_utc_time + new_status.upload_start_time = current_utc_time + + self.status = new_status + self._update(self.status) + + @abc.abstractmethod + def _read(self, location: str) -> DownloadStatus: + pass + + @abc.abstractmethod + def _update(self, download_status: DownloadStatus) -> None: + pass + + +class ConsoleManifest(Manifest): + + def __post_init__(self): + self.name = urlparse(self.location).hostname + + def _read(self, location: str) -> DownloadStatus: + return DownloadStatus() + + def _update(self, download_status: DownloadStatus) -> None: + logger.info(f'[{self.name}] {DownloadStatus.to_dict(download_status)!r}') + + +class LocalManifest(Manifest): + """Writes a JSON representation of the manifest to local file.""" + + _lock = threading.Lock() + + def __init__(self, location: Location) -> None: + super().__init__(Location(os.path.join(location, 'manifest.json'))) + if location and not os.path.exists(location): + os.makedirs(location) + + # If the file is empty, it should start out as an empty JSON object. + if not os.path.exists(self.location) or os.path.getsize(self.location) == 0: + with open(self.location, 'w') as file: + json.dump({}, file) + + def _read(self, location: str) -> DownloadStatus: + """Reads the JSON data from a manifest.""" + assert os.path.exists(self.location), f'{self.location} must exist!' + with LocalManifest._lock: + with open(self.location, 'r') as file: + manifest = json.load(file) + return DownloadStatus.from_dict(manifest.get(location, {})) + + def _update(self, download_status: DownloadStatus) -> None: + """Writes the JSON data to a manifest.""" + assert os.path.exists(self.location), f'{self.location} must exist!' + with LocalManifest._lock: + with open(self.location, 'r') as file: + manifest = json.load(file) + + status = DownloadStatus.to_dict(download_status) + manifest[status['location']] = status + + with open(self.location, 'w') as file: + json.dump(manifest, file) + logger.debug('Manifest written to.') + logger.debug(download_status) + + +class BQManifest(Manifest): + """Writes a JSON representation of the manifest to BQ file. + + This is an append-only implementation, the latest value in the manifest + represents the current state of a download. + """ + def __init__(self, location: Location) -> None: + super().__init__(Location(location[5:])) + TABLE_SCHEMA = [ + bigquery.SchemaField('config_name', 'STRING', mode='REQUIRED', + description="The name of the config file associated with the request."), + bigquery.SchemaField('dataset', 'STRING', mode='NULLABLE', + description="Represents the dataset field of the configuration."), + bigquery.SchemaField('selection', 'JSON', mode='REQUIRED', + description="Copy of selection section of the configuration."), + bigquery.SchemaField('location', 'STRING', mode='REQUIRED', + description="Location of the downloaded data."), + bigquery.SchemaField('area', 'STRING', mode='NULLABLE', + description="Represents area covered by the shard. " + "ST_GeogFromGeoJson(area): To convert a GeoJSON geometry object into a " + "GEOGRAPHY value. " + "ST_COVERS(geography_expression, ST_GEOGPOINT(longitude, latitude)): To check " + "if a point lies in the given area or not."), + bigquery.SchemaField('stage', 'STRING', mode='NULLABLE', + description="Current stage of request : 'fetch', 'download', 'retrieve', 'upload' " + "or None."), + bigquery.SchemaField('status', 'STRING', mode='REQUIRED', + description="Download status: 'scheduled', 'in-progress', 'success', or 'failure'."), + bigquery.SchemaField('error', 'STRING', mode='NULLABLE', + description="Cause of error, if any."), + bigquery.SchemaField('username', 'STRING', mode='REQUIRED', + description="Identifier for the user running the download."), + bigquery.SchemaField('size', 'FLOAT', mode='NULLABLE', + description="Shard size in GB."), + bigquery.SchemaField('scheduled_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when download was scheduled."), + bigquery.SchemaField('retrieve_start_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the retrieve stage starts."), + bigquery.SchemaField('retrieve_end_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the retrieve state ends."), + bigquery.SchemaField('fetch_start_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the fetch state starts."), + bigquery.SchemaField('fetch_end_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the fetch state ends."), + bigquery.SchemaField('download_start_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the download state starts."), + bigquery.SchemaField('download_end_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the download state ends."), + bigquery.SchemaField('upload_start_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the upload state starts."), + bigquery.SchemaField('upload_end_time', 'TIMESTAMP', mode='NULLABLE', + description="A UTC datetime when the upload state ends."), + ] + table = bigquery.Table(self.location, schema=TABLE_SCHEMA) + with bigquery.Client() as client: + client.create_table(table, exists_ok=True) + + def _read(self, location: str) -> DownloadStatus: + """Reads the JSON data from a manifest.""" + with bigquery.Client() as client: + select_statement = f"SELECT * FROM {self.location} WHERE location = @location" + + # Build the QueryJobConfig object with the parameters. + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = [bigquery.ScalarQueryParameter('location', 'STRING', location)] + + # Execute the merge statement with the parameters. + query_job = client.query(select_statement, job_config=job_config) + + # Wait for the query to execute. + result = query_job.result() + row = {} + if result.total_rows > 0: + records = result.to_dataframe().to_dict('records') + row = {n: to_json_serializable_type(v) for n, v in records[0].items()} + return DownloadStatus.from_dict(row) + + # Added retry here to handle the concurrency issue in BigQuery. + # Eg: 400 Resources exceeded during query execution: Too many DML statements outstanding + # against table , limit is 20 + @retry_with_exponential_backoff + def _update(self, download_status: DownloadStatus) -> None: + """Writes the JSON data to a manifest.""" + with bigquery.Client() as client: + status = DownloadStatus.to_dict(download_status) + table = client.get_table(self.location) + columns = [field.name for field in table.schema] + parameter_type_mapping = {field.name: field.field_type for field in table.schema} + + update_dml = [f"{col} = @{col}" for col in columns] + insert_dml = [f"@{col}" for col in columns] + params = {col: status[col] for col in columns} + + # Build the merge statement as a string with parameter placeholders. + merge_statement = f""" + MERGE {self.location} T + USING ( + SELECT + @location as location + ) S + ON T.location = S.location + WHEN MATCHED THEN + UPDATE SET + {', '.join(update_dml)} + WHEN NOT MATCHED THEN + INSERT + ({", ".join(columns)}) + VALUES + ({', '.join(insert_dml)}) + """ + + logger.debug(merge_statement) + + # Build the QueryJobConfig object with the parameters. + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = [bigquery.ScalarQueryParameter(col, parameter_type_mapping[col], value) + for col, value in params.items()] + + # Execute the merge statement with the parameters. + query_job = client.query(merge_statement, job_config=job_config) + + # Wait for the query to execute. + query_job.result() + + logger.debug('Manifest written to.') + logger.debug(download_status) + + +class FirestoreManifest(Manifest): + """A Firestore Manifest. + This Manifest implementation stores DownloadStatuses in a Firebase document store. + The document hierarchy for the manifest is as follows: + [manifest ] + ├── doc_id (md5 hash of the path) { 'selection': {...}, 'location': ..., 'username': ... } + └── etc... + Where `[]` indicates a collection and ` {...}` indicates a document. + """ + + def _get_db(self) -> firestore.firestore.Client: + """Acquire a firestore client, initializing the firebase app if necessary. + Will attempt to get the db client five times. If it's still unsuccessful, a + `ManifestException` will be raised. + """ + db = None + attempts = 0 + + while db is None: + try: + db = firestore.client() + except ValueError as e: + # The above call will fail with a value error when the firebase app is not initialized. + # Initialize the app here, and try again. + firebase_admin.initialize_app(options=self.get_firestore_config()) + logger.info('Initialized Firebase App.') + + if attempts > 4: + raise ManifestException('Exceeded number of retries to get firestore client.') from e + + time.sleep(get_wait_interval(attempts)) + + attempts += 1 + + return db + + def _read(self, location: str) -> DownloadStatus: + """Reads the JSON data from a manifest.""" + + doc_id = generate_md5_hash(location) + + # Update document with download status + download_doc_ref = ( + self.root_document_for_store(doc_id) + ) + + result = download_doc_ref.get() + row = {} + if result.exists: + records = result.to_dict() + row = {n: to_json_serializable_type(v) for n, v in records.items()} + return DownloadStatus.from_dict(row) + + def _update(self, download_status: DownloadStatus) -> None: + """Update or create a download status record.""" + logger.debug('Updating Firestore Manifest.') + + status = DownloadStatus.to_dict(download_status) + doc_id = generate_md5_hash(status['location']) + + # Update document with download status + download_doc_ref = ( + self.root_document_for_store(doc_id) + ) + + result: WriteResult = download_doc_ref.set(status) + + logger.debug(f'Firestore manifest updated. ' + f'update_time={result.update_time}, ' + f'filename={download_status.location}.') + + def root_document_for_store(self, store_scheme: str) -> DocumentReference: + """Get the root manifest document given the user's config and current document's storage location.""" + # Get user-defined collection for manifest. + root_collection = self.get_firestore_config().get('collection', 'manifest') + return self._get_db().collection(root_collection).document(store_scheme) + + def get_firestore_config(self) -> t.Dict: + """Parse firestore Location format: 'fs://?projectId=' + Users must specify a 'projectId' query parameter in the firestore location. If this argument + isn't passed in, users must set the `GOOGLE_CLOUD_PROJECT` environment variable. + Users may specify options to `firebase_admin.initialize_app()` via query arguments in the URL. + For more information about what options are available, consult this documentation: + https://firebase.google.com/docs/reference/admin/python/firebase_admin#initialize_app + Note: each query key-value pair may only appear once. If there are duplicates, the last pair + will be used. + Optionally, users may configure these options via the `FIREBASE_CONFIG` environment variable, + which is typically a path/to/a/file.json. + Examples: + >>> location = Location("fs://my-collection?projectId=my-project-id&storageBucket=foo") + >>> FirestoreManifest(location).get_firestore_config() + {'collection': 'my-collection', 'projectId': 'my-project-id', 'storageBucket': 'foo'} + Raises: + ValueError: If query parameters are malformed. + AssertionError: If the 'projectId' query parameter is not set. + """ + parsed = urlparse(self.location) + query_params = {} + if parsed.query: + query_params = dict(parse_qsl(parsed.query, strict_parsing=True)) + return {'collection': parsed.netloc, **query_params} + + +class MockManifest(Manifest): + """In-memory mock manifest.""" + + def __init__(self, location: Location) -> None: + super().__init__(location) + self.records = {} + + def _read(self, location: str) -> DownloadStatus: + manifest = self.records + return DownloadStatus.from_dict(manifest.get(location, {})) + + def _update(self, download_status: DownloadStatus) -> None: + status = DownloadStatus.to_dict(download_status) + self.records.update({status.get('location'): status}) + logger.debug('Manifest updated.') + logger.debug(download_status) + + +class NoOpManifest(Manifest): + """A manifest that performs no operations.""" + + def _read(self, location: str) -> DownloadStatus: + return DownloadStatus() + + def _update(self, download_status: DownloadStatus) -> None: + pass + + +"""Exposed manifest implementations. + +Users can choose their preferred manifest implementation by via the protocol of the Manifest Location. +The protocol corresponds to the keys of this ordered dictionary. + +If no protocol is specified, we assume the user wants to write to the local file system. +If no key is found, the `NoOpManifest` option will be chosen. See `parsers:parse_manifest_location`. +""" +MANIFESTS = collections.OrderedDict({ + 'cli': ConsoleManifest, + 'fs': FirestoreManifest, + 'bq': BQManifest, + '': LocalManifest, +}) + +if __name__ == '__main__': + # Execute doc tests + import doctest + + doctest.testmod() diff --git a/weather_dl_v2/fastapi-server/config_processing/parsers.py b/weather_dl_v2/fastapi-server/config_processing/parsers.py new file mode 100644 index 00000000..0eaadc4b --- /dev/null +++ b/weather_dl_v2/fastapi-server/config_processing/parsers.py @@ -0,0 +1,448 @@ +"""Parsers for ECMWF download configuration.""" + +import ast +import configparser +import copy as cp +import datetime +import json +import string +import textwrap +import typing as t +import numpy as np +from collections import OrderedDict + +CLIENTS = ['cds', 'mars', 'ecpublic'] +from .config import Config + +def date(candidate: str) -> datetime.date: + """Converts ECMWF-format date strings into a `datetime.date`. + + Accepted absolute date formats: + - YYYY-MM-DD + - YYYYMMDD + - YYYY-DDD, where DDD refers to the day of the year + + For example: + - 2021-10-31 + - 19700101 + - 1950-007 + + See https://confluence.ecmwf.int/pages/viewpage.action?pageId=118817289 for date format spec. + Note: Name of month is not supported. + """ + converted = None + + # Parse relative day value. + if candidate.startswith('-'): + return datetime.date.today() + datetime.timedelta(days=int(candidate)) + + accepted_formats = ["%Y-%m-%d", "%Y%m%d", "%Y-%j"] + + for fmt in accepted_formats: + try: + converted = datetime.datetime.strptime(candidate, fmt).date() + break + except ValueError: + pass + + if converted is None: + raise ValueError( + f"Not a valid date: '{candidate}'. Please use valid relative or absolute format." + ) + + return converted + + +def time(candidate: str) -> datetime.time: + """Converts ECMWF-format time strings into a `datetime.time`. + + Accepted time formats: + - HH:MM + - HHMM + - HH + + For example: + - 18:00 + - 1820 + - 18 + + Note: If MM is omitted it defaults to 00. + """ + converted = None + + accepted_formats = ["%H", "%H:%M", "%H%M"] + + for fmt in accepted_formats: + try: + converted = datetime.datetime.strptime(candidate, fmt).time() + break + except ValueError: + pass + + if converted is None: + raise ValueError( + f"Not a valid time: '{candidate}'. Please use valid format." + ) + + return converted + + +def day_month_year(candidate: t.Any) -> int: + """Converts day, month and year strings into 'int'.""" + try: + if isinstance(candidate, str) or isinstance(candidate, int): + return int(candidate) + raise ValueError('must be a str or int.') + except ValueError as e: + raise ValueError( + f"Not a valid day, month, or year value: {candidate}. Please use valid value." + ) from e + + +def parse_literal(candidate: t.Any) -> t.Any: + try: + # Support parsing ints with leading zeros, e.g. '01' + if isinstance(candidate, str) and candidate.isdigit(): + return int(candidate) + return ast.literal_eval(candidate) + except (ValueError, TypeError, SyntaxError, MemoryError, RecursionError): + return candidate + + +def validate(key: str, value: int) -> None: + """Validates value based on the key.""" + if key == "day": + assert 1 <= value <= 31, "Day value must be between 1 to 31." + if key == "month": + assert 1 <= value <= 12, "Month value must be between 1 to 12." + + +def typecast(key: str, value: t.Any) -> t.Any: + """Type the value to its appropriate datatype.""" + SWITCHER = { + 'date': date, + 'time': time, + 'day': day_month_year, + 'month': day_month_year, + 'year': day_month_year, + } + converted = SWITCHER.get(key, parse_literal)(value) + validate(key, converted) + return converted + + +def _read_config_file(file: t.IO) -> t.Dict: + """Reads `*.json` or `*.cfg` files.""" + try: + return json.load(file) + except json.JSONDecodeError: + pass + + file.seek(0) + + try: + config = configparser.ConfigParser() + config.read_file(file) + config = {s: dict(config.items(s)) for s in config.sections()} + return config + except configparser.ParsingError: + return {} + + +def parse_config(file: t.IO) -> t.Dict: + """Parses a `*.json` or `*.cfg` file into a configuration dictionary.""" + config = _read_config_file(file) + config_by_section = {s: _parse_lists(v, s) for s, v in config.items()} + config_with_nesting = parse_subsections(config_by_section) + return config_with_nesting + + +def _splitlines(block: str) -> t.List[str]: + """Converts a multi-line block into a list of strings.""" + return [line.strip() for line in block.strip().splitlines()] + + +def mars_range_value(token: str) -> t.Union[datetime.date, int, float]: + """Converts a range token into either a date, int, or float.""" + # TODO(b/175432034): Recognize time values + try: + return date(token) + except ValueError: + pass + + if token.isdecimal(): + return int(token) + + try: + return float(token) + except ValueError: + raise ValueError("Token string must be an 'int', 'float', or 'datetime.date()'.") + + +def mars_increment_value(token: str) -> t.Union[int, float]: + """Converts an increment token into either an int or a float.""" + try: + return int(token) + except ValueError: + pass + + try: + return float(token) + except ValueError: + raise ValueError("Token string must be an 'int' or a 'float'.") + + +def parse_mars_syntax(block: str) -> t.List[str]: + """Parses MARS list or range into a list of arguments; ranges are inclusive. + + Types for the range and value are inferred. + + Examples: + >>> parse_mars_syntax("10/to/12") + ['10', '11', '12'] + >>> parse_mars_syntax("12/to/10/by/-1") + ['12', '11', '10'] + >>> parse_mars_syntax("0.0/to/0.5/by/0.1") + ['0.0', '0.1', '0.2', '0.30000000000000004', '0.4', '0.5'] + >>> parse_mars_syntax("2020-01-07/to/2020-01-14/by/2") + ['2020-01-07', '2020-01-09', '2020-01-11', '2020-01-13'] + >>> parse_mars_syntax("2020-01-14/to/2020-01-07/by/-2") + ['2020-01-14', '2020-01-12', '2020-01-10', '2020-01-08'] + + Returns: + A list of strings representing a range from start to finish, based on the + type of the values in the range. + If all range values are integers, it will return a list of strings of integers. + If range values are floats, it will return a list of strings of floats. + If the range values are dates, it will return a list of strings of dates in + YYYY-MM-DD format. (Note: here, the increment value should be an integer). + """ + + # Split into tokens, omitting empty strings. + tokens = [b.strip() for b in block.split('/') if b != ''] + + # Return list if no range operators are present. + if 'to' not in tokens and 'by' not in tokens: + return tokens + + # Parse range values, honoring 'to' and 'by' operators. + try: + to_idx = tokens.index('to') + assert to_idx != 0, "There must be a start token." + start_token, end_token = tokens[to_idx - 1], tokens[to_idx + 1] + start, end = mars_range_value(start_token), mars_range_value(end_token) + + # Parse increment token, or choose default increment. + increment_token = '1' + increment = 1 + if 'by' in tokens: + increment_token = tokens[tokens.index('by') + 1] + increment = mars_increment_value(increment_token) + except (AssertionError, IndexError, ValueError): + raise SyntaxError(f"Improper range syntax in '{block}'.") + + # Return a range of values with appropriate data type. + if isinstance(start, datetime.date) and isinstance(end, datetime.date): + if not isinstance(increment, int): + raise ValueError( + f"Increments on a date range must be integer number of days, '{increment_token}' is invalid." + ) + return [d.strftime("%Y-%m-%d") for d in date_range(start, end, increment)] + elif (isinstance(start, float) or isinstance(end, float)) and not isinstance(increment, datetime.date): + # Increment can be either an int or a float. + _round_places = 4 + return [str(round(x, _round_places)).zfill(len(start_token)) + for x in np.arange(start, end + increment, increment)] + elif isinstance(start, int) and isinstance(end, int) and isinstance(increment, int): + # Honor leading zeros. + offset = 1 if start <= end else -1 + return [str(x).zfill(len(start_token)) for x in range(start, end + offset, increment)] + else: + raise ValueError( + f"Range tokens (start='{start_token}', end='{end_token}', increment='{increment_token}')" + f" are inconsistent types." + ) + + +def date_range(start: datetime.date, end: datetime.date, increment: int = 1) -> t.Iterable[datetime.date]: + """Gets a range of dates, inclusive.""" + offset = 1 if start <= end else -1 + return (start + datetime.timedelta(days=x) for x in range(0, (end - start).days + offset, increment)) + + +def _parse_lists(config: dict, section: str = '') -> t.Dict: + """Parses multiline blocks in *.cfg and *.json files as lists.""" + for key, val in config.items(): + # Checks str type for backward compatibility since it also support "padding": 0 in json config + if not isinstance(val, str): + continue + + if '/' in val and 'parameters' not in section: + config[key] = parse_mars_syntax(val) + elif '\n' in val: + config[key] = _splitlines(val) + + return config + + +def _number_of_replacements(s: t.Text): + format_names = [v[1] for v in string.Formatter().parse(s) if v[1] is not None] + num_empty_names = len([empty for empty in format_names if empty == '']) + if num_empty_names != 0: + num_empty_names -= 1 + return len(set(format_names)) + num_empty_names + + +def parse_subsections(config: t.Dict) -> t.Dict: + """Interprets [section.subsection] as nested dictionaries in `.cfg` files.""" + copy = cp.deepcopy(config) + for key, val in copy.items(): + path = key.split('.') + runner = copy + parent = {} + p = None + for p in path: + if p not in runner: + runner[p] = {} + parent = runner + runner = runner[p] + parent[p] = val + + for_cleanup = [key for key, _ in copy.items() if '.' in key] + for target in for_cleanup: + del copy[target] + return copy + + +def require(condition: bool, message: str, error_type: t.Type[Exception] = ValueError) -> None: + """A assert-like helper that wraps text and throws an error.""" + if not condition: + raise error_type(textwrap.dedent(message)) + + +def process_config(file: t.IO, config_name: str) -> Config: + """Read the config file and prompt the user if it is improperly structured.""" + config = parse_config(file) + + require(bool(config), "Unable to parse configuration file.") + require('parameters' in config, + """ + 'parameters' section required in configuration file. + + The 'parameters' section specifies the 'client', 'dataset', 'target_path', and + 'partition_key' for the API client. + + Please consult the documentation for more information.""") + + params = config.get('parameters', {}) + require('target_template' not in params, + """ + 'target_template' is deprecated, use 'target_path' instead. + + Please consult the documentation for more information.""") + require('target_path' in params, + """ + 'parameters' section requires a 'target_path' key. + + The 'target_path' is used to format the name of the output files. It + accepts Python 3.5+ string format symbols (e.g. '{}'). The number of symbols + should match the length of the 'partition_keys', as the 'partition_keys' args + are used to create the templates.""") + require('client' in params, + """ + 'parameters' section requires a 'client' key. + + Supported clients are {} + """.format(str(CLIENTS))) + require(params.get('client') in CLIENTS, + """ + Invalid 'client' parameter. + + Supported clients are {} + """.format(str(CLIENTS))) + require('append_date_dirs' not in params, + """ + The current version of 'google-weather-tools' no longer supports 'append_date_dirs'! + + Please refer to documentation for creating date-based directory hierarchy : + https://weather-tools.readthedocs.io/en/latest/Configuration.html#""" + """creating-a-date-based-directory-hierarchy.""", + NotImplementedError) + require('target_filename' not in params, + """ + The current version of 'google-weather-tools' no longer supports 'target_filename'! + + Please refer to documentation : + https://weather-tools.readthedocs.io/en/latest/Configuration.html#parameters-section.""", + NotImplementedError) + + partition_keys = params.get('partition_keys', list()) + if isinstance(partition_keys, str): + partition_keys = [partition_keys.strip()] + + selection = config.get('selection', dict()) + require(all((key in selection for key in partition_keys)), + """ + All 'partition_keys' must appear in the 'selection' section. + + 'partition_keys' specify how to split data for workers. Please consult + documentation for more information.""") + + num_template_replacements = _number_of_replacements(params['target_path']) + num_partition_keys = len(partition_keys) + + require(num_template_replacements == num_partition_keys, + """ + 'target_path' has {0} replacements. Expected {1}, since there are {1} + partition keys. + """.format(num_template_replacements, num_partition_keys)) + + if 'day' in partition_keys: + require(selection['day'] != 'all', + """If 'all' is used for a selection value, it cannot appear as a partition key.""") + + # Ensure consistent lookup. + config['parameters']['partition_keys'] = partition_keys + # Add config file name. + config['parameters']['config_name'] = config_name + + # Ensure the cartesian-cross can be taken on singleton values for the partition. + for key in partition_keys: + if not isinstance(selection[key], list): + selection[key] = [selection[key]] + + return Config.from_dict(config) + + +def prepare_target_name(config: Config) -> str: + """Returns name of target location.""" + partition_dict = OrderedDict((key, typecast(key, config.selection[key][0])) for key in config.partition_keys) + target = config.target_path.format(*partition_dict.values(), **partition_dict) + + return target + + +def get_subsections(config: Config) -> t.List[t.Tuple[str, t.Dict]]: + """Collect parameter subsections from main configuration. + + If the `parameters` section contains subsections (e.g. '[parameters.1]', + '[parameters.2]'), collect the subsection key-value pairs. Otherwise, + return an empty dictionary (i.e. there are no subsections). + + This is useful for specifying multiple API keys for your configuration. + For example: + ``` + [parameters.alice] + api_key=KKKKK1 + api_url=UUUUU1 + [parameters.bob] + api_key=KKKKK2 + api_url=UUUUU2 + [parameters.eve] + api_key=KKKKK3 + api_url=UUUUU3 + ``` + """ + return [(name, params) for name, params in config.kwargs.items() + if isinstance(params, dict)] or [('default', {})] + diff --git a/weather_dl_v2/fastapi-server/config_processing/partition.py b/weather_dl_v2/fastapi-server/config_processing/partition.py new file mode 100644 index 00000000..08225cd3 --- /dev/null +++ b/weather_dl_v2/fastapi-server/config_processing/partition.py @@ -0,0 +1,102 @@ +import copy as cp +import dataclasses +import itertools +import typing as t + +from .manifest import Manifest +from .parsers import prepare_target_name +from .config import Config +from .stores import Store, FSStore + + +@dataclasses.dataclass +class PartitionConfig(): + """Partition a config into multiple data requests. + + Partitioning involves four main operations: First, we fan-out shards based on + partition keys (a cross product of the values). Second, we filter out existing + downloads (unless we want to force downloads). Last, we assemble each partition + into a single Config. + + Attributes: + store: A cloud storage system, used for checking the existence of downloads. + manifest: A download manifest to register preparation state. + """ + + config: Config + store: Store + manifest: Manifest + + def _create_partition_config(self, option: t.Tuple) -> Config: + """Create a config for a single partition option. + + Output a config dictionary, overriding the range of values for + each key with the partition instance in 'selection'. + Continuing the example from prepare_partitions, the selection section + would be: + { 'foo': ..., 'year': ['2020'], 'month': ['01'], ... } + { 'foo': ..., 'year': ['2020'], 'month': ['02'], ... } + { 'foo': ..., 'year': ['2020'], 'month': ['03'], ... } + + Args: + option: A single item in the range of partition_keys. + config: The download config, including the parameters and selection sections. + + Returns: + A configuration with that selects a single download partition. + """ + copy = cp.deepcopy(self.config.selection) + out = cp.deepcopy(self.config) + for idx, key in enumerate(self.config.partition_keys): + copy[key] = [option[idx]] + + out.selection = copy + return out + + + def skip_partition(self, config: Config) -> bool: + """Return true if partition should be skipped.""" + + if config.force_download: + return False + + target = prepare_target_name(config) + if self.store.exists(target): + print(f'file {target} found, skipping.') + self.manifest.skip(config.config_name, config.dataset, config.selection, target, config.user_id) + return True + + return False + + + def prepare_partitions(self) -> t.Iterator[Config]: + """Iterate over client parameters, partitioning over `partition_keys`. + + This produces a Cartesian-Cross over the range of keys. + + For example, if the keys were 'year' and 'month', it would produce + an iterable like: + ( ('2020', '01'), ('2020', '02'), ('2020', '03'), ...) + + Returns: + An iterator of `Config`s. + """ + for option in itertools.product(*[self.config.selection[key] for key in self.config.partition_keys]): + yield self._create_partition_config(option) + + + def new_downloads_only(self, candidate: Config) -> bool: + """Predicate function to skip already downloaded partitions.""" + if self.store is None: + self.store = FSStore() + should_skip = self.skip_partition(candidate) + if should_skip: + print("Skipped.") + return not should_skip + + + def update_manifest_collection(self, partition: Config) -> Config: + """Updates the DB.""" + location = prepare_target_name(partition) + self.manifest.schedule(partition.config_name, partition.dataset, partition.selection, location, partition.user_id) + print(f'Created partition {location!r}.') \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py new file mode 100644 index 00000000..ca26b67b --- /dev/null +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -0,0 +1,32 @@ +import getpass +import os +from .parsers import process_config +from .partition import PartitionConfig +from .manifest import FirestoreManifest +from firestore_db.db import fake_download_db, fake_license_priority_db + +def start_processing_config(config_file, licenses): + config = {} + manifest_location = "XXXXXXXXX" + manifest = FirestoreManifest(manifest_location) + + with open(config_file, 'r', encoding='utf-8') as f: + # configs/example.cfg -> example.cfg + config_name = os.path.split(config_file)[1] + config = process_config(f, config_name) + + config.force_download = True + config.user_id = getpass.getuser() + + partition_obj = PartitionConfig(config, None, manifest) + + # Prepare partitions + for partition in partition_obj.prepare_partitions(): + # Skip existing downloads + if partition_obj.new_downloads_only(partition): + partition_obj.update_manifest_collection(partition) + + # Make entry in fake_download_db & fake_license_priority_db as mentioned by user. + fake_download_db[config_name] = {'client_name': config.client} + for license in licenses: + fake_license_priority_db[license].append(config_name) diff --git a/weather_dl_v2/fastapi-server/config_processing/stores.py b/weather_dl_v2/fastapi-server/config_processing/stores.py new file mode 100644 index 00000000..0ecd36fa --- /dev/null +++ b/weather_dl_v2/fastapi-server/config_processing/stores.py @@ -0,0 +1,108 @@ +"""Download destinations, or `Store`s.""" + +import abc +import io +import os +import tempfile +import typing as t + +from apache_beam.io.filesystems import FileSystems + + + +class Store(abc.ABC): + """A interface to represent where downloads are stored. + + Default implementation uses Apache Beam's Filesystems. + """ + + @abc.abstractmethod + def open(self, filename: str, mode: str = 'r') -> t.IO: + pass + + @abc.abstractmethod + def exists(self, filename: str) -> bool: + pass + + +class InMemoryStore(Store): + """Store file data in memory.""" + + def __init__(self): + self.store = {} + + def open(self, filename: str, mode: str = 'r') -> t.IO: + """Create or read in-memory data.""" + if 'b' in mode: + file = io.BytesIO() + else: + file = io.StringIO() + self.store[filename] = file + return file + + def exists(self, filename: str) -> bool: + """Return true if the 'file' exists in memory.""" + return filename in self.store + + +class TempFileStore(Store): + """Store data into temporary files.""" + + def __init__(self, directory: t.Optional[str] = None) -> None: + """Optionally specify the directory that contains all temporary files.""" + self.dir = directory + if self.dir and not os.path.exists(self.dir): + os.makedirs(self.dir) + + def open(self, filename: str, mode: str = 'r') -> t.IO: + """Create a temporary file in the store directory.""" + return tempfile.TemporaryFile(mode, dir=self.dir) + + def exists(self, filename: str) -> bool: + """Return true if file exists.""" + return os.path.exists(filename) + + +class LocalFileStore(Store): + """Store data into local files.""" + + def __init__(self, directory: t.Optional[str] = None) -> None: + """Optionally specify the directory that contains all downloaded files.""" + self.dir = directory + if self.dir and not os.path.exists(self.dir): + os.makedirs(self.dir) + + def open(self, filename: str, mode: str = 'r') -> t.IO: + """Open a local file from the store directory.""" + return open(os.sep.join([self.dir, filename]), mode) + + def exists(self, filename: str) -> bool: + """Returns true if local file exists.""" + return os.path.exists(os.sep.join([self.dir, filename])) + + +class FSStore(Store): + """Store data into any store supported by Apache Beam's FileSystems.""" + + def open(self, filename: str, mode: str = 'r') -> t.IO: + """Open object in cloud bucket (or local file system) as a read or write channel. + + To work with cloud storage systems, only a read or write channel can be openend + at one time. Data will be treated as bytes, not text (equivalent to `rb` or `wb`). + + Further, append operations, or writes on existing objects, are dissallowed (the + error thrown will depend on the implementation of the underlying cloud provider). + """ + if 'r' in mode and 'w' not in mode: + return FileSystems().open(filename) + + if 'w' in mode and 'r' not in mode: + return FileSystems().create(filename) + + raise ValueError( + f"invalid mode {mode!r}: mode must have either 'r' or 'w', but not both." + ) + + def exists(self, filename: str) -> bool: + """Returns true if object exists.""" + return FileSystems().exists(filename) diff --git a/weather_dl_v2/fastapi-server/config_processing/util.py b/weather_dl_v2/fastapi-server/config_processing/util.py new file mode 100644 index 00000000..862dc7eb --- /dev/null +++ b/weather_dl_v2/fastapi-server/config_processing/util.py @@ -0,0 +1,187 @@ +import datetime +import geojson +import hashlib +import itertools +import logging +import os +import socket +import subprocess +import sys +import typing as t + +import numpy as np +import pandas as pd +from apache_beam.io.gcp import gcsio +from apache_beam.utils import retry +from xarray.core.utils import ensure_us_time_resolution +from urllib.parse import urlparse +from google.api_core.exceptions import BadRequest + +logger = logging.getLogger(__name__) + +LATITUDE_RANGE = (-90, 90) +LONGITUDE_RANGE = (-180, 180) +GLOBAL_COVERAGE_AREA = [90, -180, -90, 180] + + +def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: + if isinstance(exception, socket.timeout): + return True + if isinstance(exception, TimeoutError): + return True + # To handle the concurrency issue in BigQuery. + if isinstance(exception, BadRequest): + return True + return retry.retry_if_valid_input_but_server_error_and_timeout_filter(exception) + + +class _FakeClock: + def sleep(self, value): + pass + + +def retry_with_exponential_backoff(fun): + """A retry decorator that doesn't apply during test time.""" + clock = retry.Clock() + + # Use a fake clock only during test time... + if 'unittest' in sys.modules.keys(): + clock = _FakeClock() + + return retry.with_exponential_backoff( + retry_filter=_retry_if_valid_input_but_server_or_socket_error_and_timeout_filter, + clock=clock, + )(fun) + + +# TODO(#245): Group with common utilities (duplicated) +def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]: + """Yield evenly-sized chunks from an iterable.""" + input_ = iter(iterable) + try: + while True: + it = itertools.islice(input_, n) + # peek to check if 'it' has next item. + first = next(it) + yield itertools.chain([first], it) + except StopIteration: + pass + + +# TODO(#245): Group with common utilities (duplicated) +def copy(src: str, dst: str) -> None: + """Copy data via `gsutil cp`.""" + try: + subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + raise + + +# TODO(#245): Group with common utilities (duplicated) +def to_json_serializable_type(value: t.Any) -> t.Any: + """Returns the value with a type serializable to JSON""" + # Note: The order of processing is significant. + logger.debug('Serializing to JSON') + + if pd.isna(value) or value is None: + return None + elif np.issubdtype(type(value), np.floating): + return float(value) + elif type(value) == np.ndarray: + # Will return a scaler if array is of size 1, else will return a list. + return value.tolist() + elif type(value) == datetime.datetime or type(value) == str or type(value) == np.datetime64: + # Assume strings are ISO format timestamps... + try: + value = datetime.datetime.fromisoformat(value) + except ValueError: + # ... if they are not, assume serialization is already correct. + return value + except TypeError: + # ... maybe value is a numpy datetime ... + try: + value = ensure_us_time_resolution(value).astype(datetime.datetime) + except AttributeError: + # ... value is a datetime object, continue. + pass + + # We use a string timestamp representation. + if value.tzname(): + return value.isoformat() + + # We assume here that naive timestamps are in UTC timezone. + return value.replace(tzinfo=datetime.timezone.utc).isoformat() + elif type(value) == np.timedelta64: + # Return time delta in seconds. + return float(value / np.timedelta64(1, 's')) + # This check must happen after processing np.timedelta64 and np.datetime64. + elif np.issubdtype(type(value), np.integer): + return int(value) + + return value + + +def fetch_geo_polygon(area: t.Union[list, str]) -> str: + """Calculates a geography polygon from an input area.""" + # Ref: https://confluence.ecmwf.int/pages/viewpage.action?pageId=151520973 + if isinstance(area, str): + # European area + if area == 'E': + area = [73.5, -27, 33, 45] + # Global area + elif area == 'G': + area = GLOBAL_COVERAGE_AREA + else: + raise RuntimeError(f'Not a valid value for area in config: {area}.') + + n, w, s, e = [float(x) for x in area] + if s < LATITUDE_RANGE[0]: + raise ValueError(f"Invalid latitude value for south: '{s}'") + if n > LATITUDE_RANGE[1]: + raise ValueError(f"Invalid latitude value for north: '{n}'") + if w < LONGITUDE_RANGE[0]: + raise ValueError(f"Invalid longitude value for west: '{w}'") + if e > LONGITUDE_RANGE[1]: + raise ValueError(f"Invalid longitude value for east: '{e}'") + + # Define the coordinates of the bounding box. + coords = [[w, n], [w, s], [e, s], [e, n], [w, n]] + + # Create the GeoJSON polygon object. + polygon = geojson.dumps(geojson.Polygon([coords])) + return polygon + + +def get_file_size(path: str) -> float: + parsed_gcs_path = urlparse(path) + if parsed_gcs_path.scheme != 'gs' or parsed_gcs_path.netloc == '': + return os.stat(path).st_size / (1024 ** 3) if os.path.exists(path) else 0 + else: + return gcsio.GcsIO().size(path) / (1024 ** 3) if gcsio.GcsIO().exists(path) else 0 + + +def get_wait_interval(num_retries: int = 0) -> float: + """Returns next wait interval in seconds, using an exponential backoff algorithm.""" + if 0 == num_retries: + return 0 + return 2 ** num_retries + + +def generate_md5_hash(input: str) -> str: + """Generates md5 hash for the input string.""" + return hashlib.md5(input.encode('utf-8')).hexdigest() + + +def download_with_aria2(url: str, path: str) -> None: + """Downloads a file from the given URL using the `aria2c` command-line utility, + with options set to improve download speed and reliability.""" + dir_path, file_name = os.path.split(path) + try: + subprocess.run( + ['aria2c', '-x', '16', '-s', '16', url, '-d', dir_path, '-o', file_name, '--allow-overwrite'], + check=True, + capture_output=True) + except subprocess.CalledProcessError as e: + logger.error(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + raise diff --git a/weather_dl_v2/fastapi-server/environment.yml b/weather_dl_v2/fastapi-server/environment.yml new file mode 100644 index 00000000..44e9fbe1 --- /dev/null +++ b/weather_dl_v2/fastapi-server/environment.yml @@ -0,0 +1,15 @@ +name: weather-dl-v2-server +channels: + - conda-forge +dependencies: + - python=3.10 + - xarray + - geojson + - pip=22.3 + - pip: + - fastapi[all] + - python-multipart + - numpy + - apache-beam[gcp] + - aiohttp + - firebase-admin diff --git a/weather_dl_v2/fastapi-server/example.cfg b/weather_dl_v2/fastapi-server/example.cfg new file mode 100644 index 00000000..6747012c --- /dev/null +++ b/weather_dl_v2/fastapi-server/example.cfg @@ -0,0 +1,32 @@ +[parameters] +client=mars + +target_path=gs:///test-weather-dl-v2/{date}T00z.gb +partition_keys= + date + # step + +# API Keys & Subsections go here... + +[selection] +class=od +type=pf +stream=enfo +expver=0001 +levtype=pl +levelist=100 +# params: +# (z) Geopotential 129, (t) Temperature 130, +# (u) U component of wind 131, (v) V component of wind 132, +# (q) Specific humidity 133, (w) vertical velocity 135, +# (vo) Vorticity (relative) 138, (d) Divergence 155, +# (r) Relative humidity 157 +param=129.128 +# +# next: 2019-01-01/to/existing +# +date=2019-07-18/to/2019-07-20 +time=0000 +step=0/to/2 +number=1/to/2 +grid=F640 diff --git a/weather_dl_v2/fastapi-server/firestore_db/db.py b/weather_dl_v2/fastapi-server/firestore_db/db.py new file mode 100644 index 00000000..0b5d9370 --- /dev/null +++ b/weather_dl_v2/fastapi-server/firestore_db/db.py @@ -0,0 +1,19 @@ +fake_download_db = {} + +fake_license_priority_db = { + "l1": [], + "l2": [], + "l3": [], +} + +fake_manifest_db = {} + + +fake_licenses_db = { + "l1": {"client_name": "MARS", "number_of_requests": 2, "api_key": "XXX", "api_url": "", + "k8s_deployment_id": ""}, + "l2": {"client_name": "MARS", "number_of_requests": 2, "api_key": "XXX", "api_url": "", + "k8s_deployment_id": ""}, + "l3": {"client_name": "CDS", "number_of_requests": 5, "api_key": "XXX", "api_url": "", + "k8s_deployment_id": ""}, +} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py new file mode 100644 index 00000000..910f8bd4 --- /dev/null +++ b/weather_dl_v2/fastapi-server/main.py @@ -0,0 +1,27 @@ +from contextlib import asynccontextmanager +from fastapi import FastAPI +from fastapi.responses import HTMLResponse +from routers import license, download, license_priority + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Boot up + print("Create database if not already exists.") + print("Retrieve license information & create license deployment if needed.") + yield + # Clean up + +app = FastAPI(lifespan=lifespan) + +app.include_router(license.router) +app.include_router(download.router) +app.include_router(license_priority.router) + +@app.get("/") +async def main(): + content = """ + +Greetings from weather-dl v2 !! + + """ + return HTMLResponse(content=content) \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py new file mode 100644 index 00000000..bcd4a66b --- /dev/null +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -0,0 +1,81 @@ +from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile +from firestore_db.db import fake_download_db, fake_license_priority_db +from config_processing.pipeline import start_processing_config +import shutil + +router = APIRouter( + prefix="/download", + tags=["download"], + responses={404: {"description": "Not found"}}, +) + + +def upload(file: UploadFile): + dest = f"./config_files/{file.filename}" + with open(dest, "wb+") as dest_: + shutil.copyfileobj(file.file, dest_) + return dest + + +# Can submit a config to the server. +@router.post("/") +def submit_download(file: UploadFile | None = None, licenses: list = [], background_tasks: BackgroundTasks = BackgroundTasks()): + if not file: + return {"message": "No upload file sent."} + else: + if file.filename in fake_download_db: + raise HTTPException(status_code=409, + detail=f"Please stop the ongoing download of the config file '{file.filename}' " + "before attempting to start a new download.") + try: + dest = upload(file) + # Start processing config. + background_tasks.add_task(start_processing_config, dest, licenses) + return {"message": f"file '{file.filename}' saved at '{dest}' successfully."} + except: + return {"message": f"Failed to save file '{file.filename}'."} + + +# Can check the current status of the submitted config. +# List status for all the downloads + handle filters +@router.get("/") +async def get_downloads(client_name: str | None = None): + # Get this kind of response by querying fake_download_db + fake_manifest_db. + if client_name: + res = { "config_name": "config_3", "client_name": client_name,"total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0 } + else: + res = [ + { "config_name": "config_1", "client_name": "MARS","total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0 }, + { "config_name": "config_2", "client_name": "MARS","total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0 }, + { "config_name": "config_3", "client_name": "CDS","total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0 } + ] + return res + + +# Get status of particular download +@router.get("/{config_name}") +async def get_download(config_name: str): + if config_name not in fake_download_db: + raise HTTPException(status_code=404, detail="Download config not found in weather-dl v2.") + + # Get this kind of response by querying fake_manifest_db. + res = { "config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0 } + return res + + +# Stop & remove the execution of the config. +@router.delete("/{config_name}") +async def delete_download(config_name: str): + if config_name not in fake_download_db: + raise HTTPException(status_code=404, detail="No such download config to stop & remove.") + + del fake_download_db[config_name] + for k, v in fake_license_priority_db.items(): + fake_license_priority_db[k] = [value for value in v if value != config_name] + + return {"config_name": config_name, "message": "Download config stopped & removed successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py new file mode 100644 index 00000000..80d3acee --- /dev/null +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -0,0 +1,83 @@ +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel +from firestore_db.db import fake_licenses_db, fake_license_priority_db + +class License(BaseModel): + client_name: str + number_of_requests: int + api_key: str + api_url: str + + +class LicenseInternal(License): + k8s_deployment_id: str + + +# Can perform CRUD on license table -- helps in handling API KEY expiry. +router = APIRouter( + prefix="/license", + tags=["license"], + responses={404: {"description": "Not found"}}, +) + + +# List all the license + handle filters of {client_name} +@router.get("/") +async def get_licenses(client_name: str | None = None): + if client_name: + licenses = {k: v for k, v in fake_licenses_db.items() if v["client_name"] == client_name} + return licenses + else: + return fake_licenses_db + + +# Get particular license +@router.get("/{license_id}") +async def get_license(license_id: str): + if license_id not in fake_licenses_db: + raise HTTPException(status_code=404, detail="License not found") + return {"license_id": license_id, "client_name": fake_licenses_db[license_id]["client_name"]} + + +# Update existing license +@router.put("/{license_id}") +async def update_license(license_id: str, license: License): + try: + license_dict = license.dict() + fake_licenses_db[license_id].update(license_dict) + return {"license_id": license_id, "name": "License updated successfully."} + except: + return {"license_id": license_id, "message": "No such license to update."} + + +# Add/Update k8s deployment ID for existing license (intenally) +@router.put("/server/{license_id}") +async def update_license_internal(license_id: str, license: LicenseInternal): + try: + license_dict = license.dict() + fake_licenses_db[license_id].update(license_dict) + return {"license_id": license_id, "name": "License updated successfully."} + except: + return {"license_id": license_id, "message": "No such license to update."} + + +# Add new license +@router.post("/") +async def add_license(license: License): + license_dict = license.dict() + license_dict['k8s_deployment_id'] = "" + license_id = "l4" + fake_licenses_db[license_id] = license_dict + fake_license_priority_db[license_id] = {} + return {"license_id": license_id, "message": "License removed successfully."} + + +# Remove license +@router.delete("/{license_id}") +async def delete_license(license_id: str): + if license_id not in fake_licenses_db: + raise HTTPException(status_code=404, detail="No such license to delete.") + + del fake_licenses_db[license_id] + del fake_license_priority_db[license_id] + return {"license_id": license_id, "message": "License removed successfully."} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/routers/license_priority.py b/weather_dl_v2/fastapi-server/routers/license_priority.py new file mode 100644 index 00000000..b5f2b0ed --- /dev/null +++ b/weather_dl_v2/fastapi-server/routers/license_priority.py @@ -0,0 +1,51 @@ +from fastapi import APIRouter, HTTPException, UploadFile +from firestore_db.db import fake_license_priority_db +import shutil +import tempfile +import json + +router = APIRouter( + prefix="/license-priority", + tags=["license-priority"], + responses={404: {"description": "Not found"}}, +) + +# Users can change the execution order of config per license basis. +# List the licenses priority +@router.get("/") +async def get_all_license_priority(): + return fake_license_priority_db + + +# Get particular license priority +@router.get("/{license_id}") +async def get_license_priority(license_id: str): + if license_id not in fake_license_priority_db: + raise HTTPException(status_code=404, detail="License's priority not found.") + return {"license_id": license_id, "priority": fake_license_priority_db[license_id]} + + +def parse_config(file) -> dict: + """Reads `*.json` files.""" + try: + return json.load(file) + except json.JSONDecodeError: + pass + + +# Change config's priority on particular license +@router.post("/{license_id}") +def submit_download(license_id: str, file: UploadFile | None = None): + if license_id not in fake_license_priority_db: + raise HTTPException(status_code=404, detail="License's priority not found.") + if not file: + return {"message": "No upload file sent."} + else: + try: + with tempfile.NamedTemporaryFile() as dest_file: + shutil.copyfileobj(file.file, dest_file) + priority_dict = parse_config(dest_file) + fake_license_priority_db[license_id].update(priority_dict) + return {"message": f"'{license_id}' license priority updated successfully."} + except: + return {"message": f"Failed to update '{license_id}' license priority."} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/server.yaml b/weather_dl_v2/fastapi-server/server.yaml new file mode 100644 index 00000000..4a3106a4 --- /dev/null +++ b/weather_dl_v2/fastapi-server/server.yaml @@ -0,0 +1 @@ +# TODO: Write the k8s deployment script. \ No newline at end of file From f9f4d397cd4208fa4cec628c248abdbd4f0191b0 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Thu, 18 May 2023 17:22:34 +0000 Subject: [PATCH 02/51] Replaced in-memory db with db layer (FirestoreClient()). --- .../config_processing/pipeline.py | 12 +- .../fastapi-server/db_service/database.py | 240 ++++++++++++++++++ .../fastapi-server/firestore_db/db.py | 19 -- weather_dl_v2/fastapi-server/main.py | 4 +- .../fastapi-server/routers/download.py | 28 +- .../fastapi-server/routers/license.py | 68 ++--- .../routers/license_priority.py | 51 ---- .../fastapi-server/routers/queues.py | 42 +++ 8 files changed, 343 insertions(+), 121 deletions(-) create mode 100644 weather_dl_v2/fastapi-server/db_service/database.py delete mode 100644 weather_dl_v2/fastapi-server/firestore_db/db.py delete mode 100644 weather_dl_v2/fastapi-server/routers/license_priority.py create mode 100644 weather_dl_v2/fastapi-server/routers/queues.py diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index ca26b67b..12f49170 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -3,10 +3,13 @@ from .parsers import process_config from .partition import PartitionConfig from .manifest import FirestoreManifest -from firestore_db.db import fake_download_db, fake_license_priority_db +from db_service.database import FirestoreClient +db_client = FirestoreClient() + def start_processing_config(config_file, licenses): config = {} + # TODO: Make use of db_service instead of FirestoreManifest. manifest_location = "XXXXXXXXX" manifest = FirestoreManifest(manifest_location) @@ -26,7 +29,6 @@ def start_processing_config(config_file, licenses): if partition_obj.new_downloads_only(partition): partition_obj.update_manifest_collection(partition) - # Make entry in fake_download_db & fake_license_priority_db as mentioned by user. - fake_download_db[config_name] = {'client_name': config.client} - for license in licenses: - fake_license_priority_db[license].append(config_name) + # Make entry in 'download' & 'queues' collection. + db_client._start_download(config_name, config.client) + db_client._update_queues_on_start_download(config_name, licenses) diff --git a/weather_dl_v2/fastapi-server/db_service/database.py b/weather_dl_v2/fastapi-server/db_service/database.py new file mode 100644 index 00000000..f7dbf8e4 --- /dev/null +++ b/weather_dl_v2/fastapi-server/db_service/database.py @@ -0,0 +1,240 @@ +import abc +import time +import firebase_admin +from firebase_admin import firestore +from firebase_admin import credentials +from google.cloud.firestore_v1 import DocumentSnapshot +from google.cloud.firestore_v1.types import WriteResult +from config_processing.util import get_wait_interval + + +class Database(abc.ABC): + @abc.abstractmethod + def _get_db(self): + pass + + @abc.abstractmethod + def _start_download(self, config_name: str, client_name: str) -> None: + pass + + @abc.abstractmethod + def _stop_download(self, config_name: str) -> None: + pass + + @abc.abstractmethod + def _check_download_exists(self, config_name: str) -> bool: + pass + + @abc.abstractmethod + def _add_license(self, license_dict: dict) -> str: + pass + + @abc.abstractmethod + def _delete_license(self, license_id: str) -> str: + pass + + @abc.abstractmethod + def _create_license_queue(self, license_id: str, client_name: str) -> None: + pass + + @abc.abstractmethod + def _remove_license_queue(self, license_id: str) -> None: + pass + + @abc.abstractmethod + def _get_queues(self) -> list: + pass + + @abc.abstractmethod + def _get_queue_by_license_id(self, license_id: str) -> dict: + pass + + @abc.abstractmethod + def _get_queue_by_client_name(self, client_name: str) -> list: + pass + + @abc.abstractmethod + def _update_license_queue(self, license_id: str, priority_list: list) -> None: + pass + + @abc.abstractmethod + def _check_license_exists(self, license_id: str) -> bool: + pass + + @abc.abstractmethod + def _get_license_by_license_id(slef, license_id: str) -> dict: + pass + + @abc.abstractmethod + def _get_license_by_client_name(self, client_name: str) -> list: + pass + + @abc.abstractmethod + def _get_licenses(self) -> list: + pass + + @abc.abstractmethod + def _update_license(self, license_id: str, license_dict: dict) -> None: + pass + + # TODO: Find better way to execute these query. + # @abc.abstractmethod + # def _get_download_by_config_name(self, config_name: str) -> dict: + # pass + + # @abc.abstractmethod + # def _get_dowloads(self) -> list: + # pass + + @abc.abstractmethod + def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: + pass + + @abc.abstractmethod + def _update_queues_on_stop_download(self, config_name: str) -> None: + pass + + + +class FirestoreClient(Database): + def _get_db(self) -> firestore.firestore.Client: + """Acquire a firestore client, initializing the firebase app if necessary. + Will attempt to get the db client five times. If it's still unsuccessful, a + `ManifestException` will be raised. + """ + db = None + attempts = 0 + + while db is None: + try: + db = firestore.client() + except ValueError as e: + # The above call will fail with a value error when the firebase app is not initialized. + # Initialize the app here, and try again. + # Use the application default credentials. + cred = credentials.ApplicationDefault() + + firebase_admin.initialize_app(cred) + print('Initialized Firebase App.') + + if attempts > 4: + raise RuntimeError('Exceeded number of retries to get firestore client.') from e + + time.sleep(get_wait_interval(attempts)) + + attempts += 1 + + return db + + + def _start_download(self, config_name: str, client_name: str) -> None: + result: WriteResult = self._get_db().collection('download').document(config_name).set( + {'client_name': client_name} + ) + + print(f"Added {config_name} in 'download' collection. Update_time: {result.update_time}.") + + def _stop_download(self, config_name: str) -> None: + timestamp = self._get_db().collection('download').document(config_name).delete() + print(f"Removed {config_name} in 'download' collection. Update_time: {timestamp}.") + + def _check_download_exists(self, config_name: str) -> bool: + result: DocumentSnapshot = self._get_db().collection('download').document(config_name).get() + return result.exists + + def _add_license(self, license_dict: dict) -> str: + license_id = f"L{len(self._get_db().collection('license').get()) + 1}" + license_dict["license_id"] = license_id + result: WriteResult = self._get_db().collection('license').document(license_id).set( + license_dict + ) + print(f"Added {license_id} in 'license' collection. Update_time: {result.update_time}.") + return license_id + + def _delete_license(self, license_id: str) -> None: + timestamp = self._get_db().collection('license').document(license_id).delete() + print(f"Removed {license_id} in 'license' collection. Update_time: {timestamp}.") + + def _update_license(self, license_id: str, license_dict: dict) -> None: + result: WriteResult = self._get_db().collection('license').document(license_id).update({ + license_dict + }) + print(f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}.") + + def _create_license_queue(self, license_id: str, client_name: str) -> None: + result: WriteResult = self._get_db().collection('queues').document(license_id).set( + {"client_name": client_name,"queue": []} + ) + print(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _remove_license_queue(self, license_id: str) -> None: + timestamp = self._get_db().collection('queues').document(license_id).delete() + print(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") + + def _get_queues(self) -> list: + snapshot_list = self._get_db().collection('queues').get() + result = [] + for snapshot in snapshot_list: + result.append(self._get_db().collection('queues').document(snapshot.id).get().to_dict()) + return result + + def _get_queue_by_license_id(self, license_id: str) -> dict: + result: DocumentSnapshot = self._get_db().collection('queues').document(license_id).get() + return result.to_dict() + + def _get_queue_by_client_name(self, client_name: str) -> list: + snapshot_list = self._get_db().collection('queues').where('client_name', '==', client_name).get() + result = [] + for snapshot in snapshot_list: + result.append(snapshot.to_dict()) + return result + + def _update_license_queue(self, license_id: str, priority_list: list) -> None: + result: WriteResult = self._get_db().collection('queues').document(license).update( + {'queue': priority_list} + ) + print(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _check_license_exists(self, license_id: str) -> bool: + result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() + return result.exists + + def _get_license_by_license_id(self, license_id: str) -> dict: + result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() + return result.to_dict() + + def _get_license_by_client_name(self, client_name: str) -> list: + snapshot_list = self._get_db().collection('license').where('client_name', '==', client_name).get() + result = [] + for snapshot in snapshot_list: + result.append(snapshot.to_dict()) + return result + + def _get_licenses(self) -> list: + snapshot_list = self._get_db().collection('license').get() + result = [] + for snapshot in snapshot_list: + result.append(self._get_db().collection('license').document(snapshot.id).get().to_dict()) + return result + + # def _get_download_by_config_name(self, config_name: str) -> dict: + # result: DocumentSnapshot = self._get_db().collection('download_status').document(config_name).get() + # return result.to_dict() + + # def _get_dowloads(self) -> list: + # pass + + + def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: + for license in licenses: + result: WriteResult = self._get_db().collection('queues').document(license).update( + {'queue': firestore.ArrayUnion([config_name])} + ) + print(f"Updated {license} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _update_queues_on_stop_download(self, config_name: str) -> None: + snapshot_list = self._get_db().collection('queues').get() + for snapshot in snapshot_list: + result: WriteResult = self._get_db().collection('queues').document(snapshot.id).update({ + 'queue': firestore.ArrayRemove([config_name])}) + print(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") diff --git a/weather_dl_v2/fastapi-server/firestore_db/db.py b/weather_dl_v2/fastapi-server/firestore_db/db.py deleted file mode 100644 index 0b5d9370..00000000 --- a/weather_dl_v2/fastapi-server/firestore_db/db.py +++ /dev/null @@ -1,19 +0,0 @@ -fake_download_db = {} - -fake_license_priority_db = { - "l1": [], - "l2": [], - "l3": [], -} - -fake_manifest_db = {} - - -fake_licenses_db = { - "l1": {"client_name": "MARS", "number_of_requests": 2, "api_key": "XXX", "api_url": "", - "k8s_deployment_id": ""}, - "l2": {"client_name": "MARS", "number_of_requests": 2, "api_key": "XXX", "api_url": "", - "k8s_deployment_id": ""}, - "l3": {"client_name": "CDS", "number_of_requests": 5, "api_key": "XXX", "api_url": "", - "k8s_deployment_id": ""}, -} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 910f8bd4..638cad88 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -1,7 +1,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.responses import HTMLResponse -from routers import license, download, license_priority +from routers import license, download, queues @asynccontextmanager async def lifespan(app: FastAPI): @@ -15,7 +15,7 @@ async def lifespan(app: FastAPI): app.include_router(license.router) app.include_router(download.router) -app.include_router(license_priority.router) +app.include_router(queues.router) @app.get("/") async def main(): diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index bcd4a66b..f57c8955 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -1,8 +1,10 @@ from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile -from firestore_db.db import fake_download_db, fake_license_priority_db +from db_service.database import FirestoreClient from config_processing.pipeline import start_processing_config import shutil +db_client = FirestoreClient() + router = APIRouter( prefix="/download", tags=["download"], @@ -23,8 +25,8 @@ def submit_download(file: UploadFile | None = None, licenses: list = [], backgro if not file: return {"message": "No upload file sent."} else: - if file.filename in fake_download_db: - raise HTTPException(status_code=409, + if db_client._check_download_exists(file.filename): + raise HTTPException(status_code=400, detail=f"Please stop the ongoing download of the config file '{file.filename}' " "before attempting to start a new download.") try: @@ -42,10 +44,10 @@ def submit_download(file: UploadFile | None = None, licenses: list = [], backgro async def get_downloads(client_name: str | None = None): # Get this kind of response by querying fake_download_db + fake_manifest_db. if client_name: - res = { "config_name": "config_3", "client_name": client_name,"total_shards": 10000, "scheduled_shards": 4990, + result = { "config_name": "config_3", "client_name": client_name,"total_shards": 10000, "scheduled_shards": 4990, "downloaded_shards": 5000, "failed_shards": 0 } else: - res = [ + result = [ { "config_name": "config_1", "client_name": "MARS","total_shards": 10000, "scheduled_shards": 4990, "downloaded_shards": 5000, "failed_shards": 0 }, { "config_name": "config_2", "client_name": "MARS","total_shards": 10000, "scheduled_shards": 4990, @@ -53,29 +55,27 @@ async def get_downloads(client_name: str | None = None): { "config_name": "config_3", "client_name": "CDS","total_shards": 10000, "scheduled_shards": 4990, "downloaded_shards": 5000, "failed_shards": 0 } ] - return res + return result # Get status of particular download @router.get("/{config_name}") async def get_download(config_name: str): - if config_name not in fake_download_db: + if not db_client._check_download_exists(config_name): raise HTTPException(status_code=404, detail="Download config not found in weather-dl v2.") # Get this kind of response by querying fake_manifest_db. - res = { "config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, + result = { "config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, "downloaded_shards": 5000, "failed_shards": 0 } - return res + return result # Stop & remove the execution of the config. @router.delete("/{config_name}") async def delete_download(config_name: str): - if config_name not in fake_download_db: + if not db_client._check_download_exists(config_name): raise HTTPException(status_code=404, detail="No such download config to stop & remove.") - del fake_download_db[config_name] - for k, v in fake_license_priority_db.items(): - fake_license_priority_db[k] = [value for value in v if value != config_name] - + db_client._stop_download(config_name) + db_client._update_queues_on_stop_download(config_name) return {"config_name": config_name, "message": "Download config stopped & removed successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 80d3acee..2e4ee019 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -1,7 +1,9 @@ from fastapi import APIRouter, HTTPException from pydantic import BaseModel -from firestore_db.db import fake_licenses_db, fake_license_priority_db +from db_service.database import FirestoreClient +db_client = FirestoreClient() + class License(BaseModel): client_name: str number_of_requests: int @@ -25,59 +27,65 @@ class LicenseInternal(License): @router.get("/") async def get_licenses(client_name: str | None = None): if client_name: - licenses = {k: v for k, v in fake_licenses_db.items() if v["client_name"] == client_name} - return licenses + result = db_client._get_license_by_client_name(client_name) else: - return fake_licenses_db + result = db_client._get_licenses() + return result # Get particular license @router.get("/{license_id}") -async def get_license(license_id: str): - if license_id not in fake_licenses_db: - raise HTTPException(status_code=404, detail="License not found") - return {"license_id": license_id, "client_name": fake_licenses_db[license_id]["client_name"]} +async def get_license_by_license_id(license_id: str): + result = db_client._get_license_by_license_id(license_id) + if not result: + raise HTTPException(status_code=404, detail="License not found.") + return result # Update existing license @router.put("/{license_id}") async def update_license(license_id: str, license: License): - try: - license_dict = license.dict() - fake_licenses_db[license_id].update(license_dict) - return {"license_id": license_id, "name": "License updated successfully."} - except: - return {"license_id": license_id, "message": "No such license to update."} + if not db_client._check_license_exists(license_id): + raise HTTPException(status_code=404, detail="No such license to update.") + + license_dict = license.dict() + db_client._update_license(license_id, license_dict) + # TODO: Add a background task to create k8s deployement for this updated license. + # And update entry of 'k8s_deployment_id' entry in 'license' collection. + return {"license_id": license_id, "name": "License updated successfully."} -# Add/Update k8s deployment ID for existing license (intenally) +# Add/Update k8s deployment ID for existing license (intenally). @router.put("/server/{license_id}") -async def update_license_internal(license_id: str, license: LicenseInternal): - try: - license_dict = license.dict() - fake_licenses_db[license_id].update(license_dict) - return {"license_id": license_id, "name": "License updated successfully."} - except: - return {"license_id": license_id, "message": "No such license to update."} +async def update_license_internal(license_id: str, k8s_deployment_id: str): + if not db_client._check_license_exists(license_id): + raise HTTPException(status_code=404, detail="No such license to update.") + license_dict = {"k8s_deployment_id": k8s_deployment_id} + + db_client._update_license(license_id, license_dict) + return {"license_id": license_id, "message": "License updated successfully."} # Add new license @router.post("/") async def add_license(license: License): + print(license) license_dict = license.dict() license_dict['k8s_deployment_id'] = "" - license_id = "l4" - fake_licenses_db[license_id] = license_dict - fake_license_priority_db[license_id] = {} - return {"license_id": license_id, "message": "License removed successfully."} + license_id = db_client._add_license(license_dict) + db_client._create_license_queue(license_id, license_dict['client_name']) + # TODO: Add a background task to create k8s deployement for this newly added license. + # And update entry of 'k8s_deployment_id' entry in 'license' collection. + return {"license_id": license_id, "message": "License added successfully."} # Remove license @router.delete("/{license_id}") async def delete_license(license_id: str): - if license_id not in fake_licenses_db: + if not db_client._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to delete.") - - del fake_licenses_db[license_id] - del fake_license_priority_db[license_id] + db_client._delete_license(license_id) + db_client._remove_license_queue(license_id) + # TODO: Add a background task to delete k8s deployement for this deleted license. + # And update entry of 'k8s_deployment_id' entry in 'license' collection. return {"license_id": license_id, "message": "License removed successfully."} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/routers/license_priority.py b/weather_dl_v2/fastapi-server/routers/license_priority.py deleted file mode 100644 index b5f2b0ed..00000000 --- a/weather_dl_v2/fastapi-server/routers/license_priority.py +++ /dev/null @@ -1,51 +0,0 @@ -from fastapi import APIRouter, HTTPException, UploadFile -from firestore_db.db import fake_license_priority_db -import shutil -import tempfile -import json - -router = APIRouter( - prefix="/license-priority", - tags=["license-priority"], - responses={404: {"description": "Not found"}}, -) - -# Users can change the execution order of config per license basis. -# List the licenses priority -@router.get("/") -async def get_all_license_priority(): - return fake_license_priority_db - - -# Get particular license priority -@router.get("/{license_id}") -async def get_license_priority(license_id: str): - if license_id not in fake_license_priority_db: - raise HTTPException(status_code=404, detail="License's priority not found.") - return {"license_id": license_id, "priority": fake_license_priority_db[license_id]} - - -def parse_config(file) -> dict: - """Reads `*.json` files.""" - try: - return json.load(file) - except json.JSONDecodeError: - pass - - -# Change config's priority on particular license -@router.post("/{license_id}") -def submit_download(license_id: str, file: UploadFile | None = None): - if license_id not in fake_license_priority_db: - raise HTTPException(status_code=404, detail="License's priority not found.") - if not file: - return {"message": "No upload file sent."} - else: - try: - with tempfile.NamedTemporaryFile() as dest_file: - shutil.copyfileobj(file.file, dest_file) - priority_dict = parse_config(dest_file) - fake_license_priority_db[license_id].update(priority_dict) - return {"message": f"'{license_id}' license priority updated successfully."} - except: - return {"message": f"Failed to update '{license_id}' license priority."} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py new file mode 100644 index 00000000..f9eeed04 --- /dev/null +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -0,0 +1,42 @@ +from fastapi import APIRouter, HTTPException +from db_service.database import FirestoreClient + +db_client = FirestoreClient() + +router = APIRouter( + prefix="/queues", + tags=["queues"], + responses={404: {"description": "Not found"}}, +) + +# Users can change the execution order of config per license basis. + +# List the licenses priority + {client_name} filter +@router.get("/") +async def get_all_license_queue(client_name: str | None = None): + if client_name: + result = db_client._get_queue_by_client_name(client_name) + else: + result = db_client._get_queues() + return result + + +# Get particular license priority +@router.get("/{license_id}") +async def get_license_queue(license_id: str): + result = db_client._get_queue_by_license_id(license_id) + if not result: + raise HTTPException(status_code=404, detail="License's priority not found.") + return result + + +# Change config's priority on particular license +@router.post("/{license_id}") +def modify_license_queue(license_id: str, priority_list: list | None = []): + if not db_client._check_license_exists(license_id): + raise HTTPException(status_code=404, detail="License's priority not found.") + try: + db_client._update_license_queue(license_id, priority_list) + return {"message": f"'{license_id}' license priority updated successfully."} + except: + return {"message": f"Failed to update '{license_id}' license priority."} \ No newline at end of file From 6d817c3e556dbe63698968c621f10ad27c8ad45d Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Fri, 19 May 2023 09:10:43 +0000 Subject: [PATCH 03/51] Updated api_endpoints & added server k8s dep. conf --- weather_dl_v2/fastapi-server/README.md | 5 ++ .../fastapi-server/db_service/database.py | 5 +- weather_dl_v2/fastapi-server/server.yaml | 87 ++++++++++++++++++- 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 1d0cfa0a..8faaf249 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -22,6 +22,11 @@ export REPO= eg:weather-tools gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-server" --timeout=79200 --machine-type=e2-highcpu-32 ``` +* **Add path of created server image in server.yaml**: +``` +Please write down the fetcher's docker image path at Line 42 of server.yaml. +``` + * **Deploy fastapi server on kubernetes:** ``` kubectl apply -f server.yaml --force diff --git a/weather_dl_v2/fastapi-server/db_service/database.py b/weather_dl_v2/fastapi-server/db_service/database.py index f7dbf8e4..bdcd2f61 100644 --- a/weather_dl_v2/fastapi-server/db_service/database.py +++ b/weather_dl_v2/fastapi-server/db_service/database.py @@ -129,7 +129,7 @@ def _get_db(self) -> firestore.firestore.Client: def _start_download(self, config_name: str, client_name: str) -> None: result: WriteResult = self._get_db().collection('download').document(config_name).set( - {'client_name': client_name} + {'config_name': config_name, 'client_name': client_name} ) print(f"Added {config_name} in 'download' collection. Update_time: {result.update_time}.") @@ -163,7 +163,7 @@ def _update_license(self, license_id: str, license_dict: dict) -> None: def _create_license_queue(self, license_id: str, client_name: str) -> None: result: WriteResult = self._get_db().collection('queues').document(license_id).set( - {"client_name": client_name,"queue": []} + {"license_id": license_id, "client_name": client_name,"queue": []} ) print(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") @@ -224,7 +224,6 @@ def _get_licenses(self) -> list: # def _get_dowloads(self) -> list: # pass - def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: for license in licenses: result: WriteResult = self._get_db().collection('queues').document(license).update( diff --git a/weather_dl_v2/fastapi-server/server.yaml b/weather_dl_v2/fastapi-server/server.yaml index 4a3106a4..e64d6355 100644 --- a/weather_dl_v2/fastapi-server/server.yaml +++ b/weather_dl_v2/fastapi-server/server.yaml @@ -1 +1,86 @@ -# TODO: Write the k8s deployment script. \ No newline at end of file +# Due to our org level policy we can't expose external-ip. +# In case your project don't have any such restriction a +# then no need to create a nginx-server on VM to access this fastapi server +# instead create the LoadBalancer Service given below. +# +# # weather-dl server LoadBalancer Service +# # Enables the pods in a deployment to be accessible from outside the cluster +# apiVersion: v1 +# kind: Service +# metadata: +# name: weather-dl-v2-server-service +# spec: +# selector: +# app: weather-dl-v2-server-api +# ports: +# - protocol: "TCP" +# port: 8080 +# targetPort: 8080 +# type: LoadBalancer + +--- +# weather-dl-server-api Deployment +# Defines the deployment of the app running in a pod on any worker node +apiVersion: apps/v1 +kind: Deployment +metadata: + name: weather-dl-v2-server-api + labels: + app: weather-dl-v2-server-api +spec: + replicas: 1 + selector: + matchLabels: + app: weather-dl-v2-server-api + template: + metadata: + labels: + app: weather-dl-v2-server-api + spec: + containers: + - name: weather-dl-v2-server-api + image: XXXXXXX + ports: + - containerPort: 8080 + imagePullPolicy: Always + # resources: + # # You must specify requests for CPU to autoscale + # # based on CPU utilization + # requests: + # cpu: "250m" +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: weather-dl-v2-server-api +rules: + - apiGroups: + - "" + - "apps" + - "batch" + resources: + - endpoints + - deployments + - pods + - jobs + verbs: + - get + - list + - watch + - create + - delete +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: weather-dl-v2-server-api + namespace: default +subjects: + - kind: ServiceAccount + name: default + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: weather-dl-v2-server-api +--- \ No newline at end of file From a76c24e962710765c6dabb822c825ed0c223c3f0 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Fri, 19 May 2023 11:10:33 +0000 Subject: [PATCH 04/51] Updated 'manifest' system to uses 'database' layer --- .../config_processing/manifest.py | 284 +----------------- .../config_processing/pipeline.py | 4 +- .../config_processing/stores.py | 1 - .../fastapi-server/config_processing/util.py | 8 +- .../fastapi-server/db_service/database.py | 5 +- weather_dl_v2/fastapi-server/main.py | 1 + 6 files changed, 21 insertions(+), 282 deletions(-) diff --git a/weather_dl_v2/fastapi-server/config_processing/manifest.py b/weather_dl_v2/fastapi-server/config_processing/manifest.py index e0a0a6e4..eec16749 100644 --- a/weather_dl_v2/fastapi-server/config_processing/manifest.py +++ b/weather_dl_v2/fastapi-server/config_processing/manifest.py @@ -1,19 +1,14 @@ """Client interface for connecting to a manifest.""" import abc -import collections import dataclasses import datetime import enum import json -import logging -import os import pandas as pd -import threading import time import traceback import typing as t -from urllib.parse import urlparse, parse_qsl from .util import ( to_json_serializable_type, @@ -21,21 +16,20 @@ get_file_size, get_wait_interval, generate_md5_hash, - retry_with_exponential_backoff, GLOBAL_COVERAGE_AREA ) import firebase_admin +from firebase_admin import credentials from firebase_admin import firestore -from google.cloud import bigquery from google.cloud.firestore_v1 import DocumentReference from google.cloud.firestore_v1.types import WriteResult +from db_service.database import Database + """An implementation-dependent Manifest URI.""" Location = t.NewType('Location', str) -logger = logging.getLogger(__name__) - class ManifestException(Exception): """Errors that occur in Manifest Clients.""" @@ -200,11 +194,9 @@ class Manifest(abc.ABC): ``` Attributes: - location: An implementation-specific manifest URI. status: The current `DownloadStatus` of the Manifest. """ - location: Location # To reduce the impact of _read() and _update() calls # on the start time of the stage. prev_stage_precise_start_time: t.Optional[str] = None @@ -282,7 +274,7 @@ def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, upload_end_time=current_utc_time, ) self._update(status) - logger.info(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + print(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: """Reset Manifest state in preparation for a new transaction.""" @@ -382,186 +374,7 @@ def _update(self, download_status: DownloadStatus) -> None: pass -class ConsoleManifest(Manifest): - - def __post_init__(self): - self.name = urlparse(self.location).hostname - - def _read(self, location: str) -> DownloadStatus: - return DownloadStatus() - - def _update(self, download_status: DownloadStatus) -> None: - logger.info(f'[{self.name}] {DownloadStatus.to_dict(download_status)!r}') - - -class LocalManifest(Manifest): - """Writes a JSON representation of the manifest to local file.""" - - _lock = threading.Lock() - - def __init__(self, location: Location) -> None: - super().__init__(Location(os.path.join(location, 'manifest.json'))) - if location and not os.path.exists(location): - os.makedirs(location) - - # If the file is empty, it should start out as an empty JSON object. - if not os.path.exists(self.location) or os.path.getsize(self.location) == 0: - with open(self.location, 'w') as file: - json.dump({}, file) - - def _read(self, location: str) -> DownloadStatus: - """Reads the JSON data from a manifest.""" - assert os.path.exists(self.location), f'{self.location} must exist!' - with LocalManifest._lock: - with open(self.location, 'r') as file: - manifest = json.load(file) - return DownloadStatus.from_dict(manifest.get(location, {})) - - def _update(self, download_status: DownloadStatus) -> None: - """Writes the JSON data to a manifest.""" - assert os.path.exists(self.location), f'{self.location} must exist!' - with LocalManifest._lock: - with open(self.location, 'r') as file: - manifest = json.load(file) - - status = DownloadStatus.to_dict(download_status) - manifest[status['location']] = status - - with open(self.location, 'w') as file: - json.dump(manifest, file) - logger.debug('Manifest written to.') - logger.debug(download_status) - - -class BQManifest(Manifest): - """Writes a JSON representation of the manifest to BQ file. - - This is an append-only implementation, the latest value in the manifest - represents the current state of a download. - """ - def __init__(self, location: Location) -> None: - super().__init__(Location(location[5:])) - TABLE_SCHEMA = [ - bigquery.SchemaField('config_name', 'STRING', mode='REQUIRED', - description="The name of the config file associated with the request."), - bigquery.SchemaField('dataset', 'STRING', mode='NULLABLE', - description="Represents the dataset field of the configuration."), - bigquery.SchemaField('selection', 'JSON', mode='REQUIRED', - description="Copy of selection section of the configuration."), - bigquery.SchemaField('location', 'STRING', mode='REQUIRED', - description="Location of the downloaded data."), - bigquery.SchemaField('area', 'STRING', mode='NULLABLE', - description="Represents area covered by the shard. " - "ST_GeogFromGeoJson(area): To convert a GeoJSON geometry object into a " - "GEOGRAPHY value. " - "ST_COVERS(geography_expression, ST_GEOGPOINT(longitude, latitude)): To check " - "if a point lies in the given area or not."), - bigquery.SchemaField('stage', 'STRING', mode='NULLABLE', - description="Current stage of request : 'fetch', 'download', 'retrieve', 'upload' " - "or None."), - bigquery.SchemaField('status', 'STRING', mode='REQUIRED', - description="Download status: 'scheduled', 'in-progress', 'success', or 'failure'."), - bigquery.SchemaField('error', 'STRING', mode='NULLABLE', - description="Cause of error, if any."), - bigquery.SchemaField('username', 'STRING', mode='REQUIRED', - description="Identifier for the user running the download."), - bigquery.SchemaField('size', 'FLOAT', mode='NULLABLE', - description="Shard size in GB."), - bigquery.SchemaField('scheduled_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when download was scheduled."), - bigquery.SchemaField('retrieve_start_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the retrieve stage starts."), - bigquery.SchemaField('retrieve_end_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the retrieve state ends."), - bigquery.SchemaField('fetch_start_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the fetch state starts."), - bigquery.SchemaField('fetch_end_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the fetch state ends."), - bigquery.SchemaField('download_start_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the download state starts."), - bigquery.SchemaField('download_end_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the download state ends."), - bigquery.SchemaField('upload_start_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the upload state starts."), - bigquery.SchemaField('upload_end_time', 'TIMESTAMP', mode='NULLABLE', - description="A UTC datetime when the upload state ends."), - ] - table = bigquery.Table(self.location, schema=TABLE_SCHEMA) - with bigquery.Client() as client: - client.create_table(table, exists_ok=True) - - def _read(self, location: str) -> DownloadStatus: - """Reads the JSON data from a manifest.""" - with bigquery.Client() as client: - select_statement = f"SELECT * FROM {self.location} WHERE location = @location" - - # Build the QueryJobConfig object with the parameters. - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = [bigquery.ScalarQueryParameter('location', 'STRING', location)] - - # Execute the merge statement with the parameters. - query_job = client.query(select_statement, job_config=job_config) - - # Wait for the query to execute. - result = query_job.result() - row = {} - if result.total_rows > 0: - records = result.to_dataframe().to_dict('records') - row = {n: to_json_serializable_type(v) for n, v in records[0].items()} - return DownloadStatus.from_dict(row) - - # Added retry here to handle the concurrency issue in BigQuery. - # Eg: 400 Resources exceeded during query execution: Too many DML statements outstanding - # against table , limit is 20 - @retry_with_exponential_backoff - def _update(self, download_status: DownloadStatus) -> None: - """Writes the JSON data to a manifest.""" - with bigquery.Client() as client: - status = DownloadStatus.to_dict(download_status) - table = client.get_table(self.location) - columns = [field.name for field in table.schema] - parameter_type_mapping = {field.name: field.field_type for field in table.schema} - - update_dml = [f"{col} = @{col}" for col in columns] - insert_dml = [f"@{col}" for col in columns] - params = {col: status[col] for col in columns} - - # Build the merge statement as a string with parameter placeholders. - merge_statement = f""" - MERGE {self.location} T - USING ( - SELECT - @location as location - ) S - ON T.location = S.location - WHEN MATCHED THEN - UPDATE SET - {', '.join(update_dml)} - WHEN NOT MATCHED THEN - INSERT - ({", ".join(columns)}) - VALUES - ({', '.join(insert_dml)}) - """ - - logger.debug(merge_statement) - - # Build the QueryJobConfig object with the parameters. - job_config = bigquery.QueryJobConfig() - job_config.query_parameters = [bigquery.ScalarQueryParameter(col, parameter_type_mapping[col], value) - for col, value in params.items()] - - # Execute the merge statement with the parameters. - query_job = client.query(merge_statement, job_config=job_config) - - # Wait for the query to execute. - query_job.result() - - logger.debug('Manifest written to.') - logger.debug(download_status) - - -class FirestoreManifest(Manifest): +class FirestoreManifest(Manifest, Database): """A Firestore Manifest. This Manifest implementation stores DownloadStatuses in a Firebase document store. The document hierarchy for the manifest is as follows: @@ -585,8 +398,11 @@ def _get_db(self) -> firestore.firestore.Client: except ValueError as e: # The above call will fail with a value error when the firebase app is not initialized. # Initialize the app here, and try again. - firebase_admin.initialize_app(options=self.get_firestore_config()) - logger.info('Initialized Firebase App.') + # Use the application default credentials. + cred = credentials.ApplicationDefault() + + firebase_admin.initialize_app(cred) + print('Initialized Firebase App.') if attempts > 4: raise ManifestException('Exceeded number of retries to get firestore client.') from e @@ -616,7 +432,7 @@ def _read(self, location: str) -> DownloadStatus: def _update(self, download_status: DownloadStatus) -> None: """Update or create a download status record.""" - logger.debug('Updating Firestore Manifest.') + print('Updating Firestore Manifest.') status = DownloadStatus.to_dict(download_status) doc_id = generate_md5_hash(status['location']) @@ -628,87 +444,13 @@ def _update(self, download_status: DownloadStatus) -> None: result: WriteResult = download_doc_ref.set(status) - logger.debug(f'Firestore manifest updated. ' + print(f'Firestore manifest updated. ' f'update_time={result.update_time}, ' f'filename={download_status.location}.') def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" # Get user-defined collection for manifest. - root_collection = self.get_firestore_config().get('collection', 'manifest') + root_collection = 'test_manifest' return self._get_db().collection(root_collection).document(store_scheme) - def get_firestore_config(self) -> t.Dict: - """Parse firestore Location format: 'fs://?projectId=' - Users must specify a 'projectId' query parameter in the firestore location. If this argument - isn't passed in, users must set the `GOOGLE_CLOUD_PROJECT` environment variable. - Users may specify options to `firebase_admin.initialize_app()` via query arguments in the URL. - For more information about what options are available, consult this documentation: - https://firebase.google.com/docs/reference/admin/python/firebase_admin#initialize_app - Note: each query key-value pair may only appear once. If there are duplicates, the last pair - will be used. - Optionally, users may configure these options via the `FIREBASE_CONFIG` environment variable, - which is typically a path/to/a/file.json. - Examples: - >>> location = Location("fs://my-collection?projectId=my-project-id&storageBucket=foo") - >>> FirestoreManifest(location).get_firestore_config() - {'collection': 'my-collection', 'projectId': 'my-project-id', 'storageBucket': 'foo'} - Raises: - ValueError: If query parameters are malformed. - AssertionError: If the 'projectId' query parameter is not set. - """ - parsed = urlparse(self.location) - query_params = {} - if parsed.query: - query_params = dict(parse_qsl(parsed.query, strict_parsing=True)) - return {'collection': parsed.netloc, **query_params} - - -class MockManifest(Manifest): - """In-memory mock manifest.""" - - def __init__(self, location: Location) -> None: - super().__init__(location) - self.records = {} - - def _read(self, location: str) -> DownloadStatus: - manifest = self.records - return DownloadStatus.from_dict(manifest.get(location, {})) - - def _update(self, download_status: DownloadStatus) -> None: - status = DownloadStatus.to_dict(download_status) - self.records.update({status.get('location'): status}) - logger.debug('Manifest updated.') - logger.debug(download_status) - - -class NoOpManifest(Manifest): - """A manifest that performs no operations.""" - - def _read(self, location: str) -> DownloadStatus: - return DownloadStatus() - - def _update(self, download_status: DownloadStatus) -> None: - pass - - -"""Exposed manifest implementations. - -Users can choose their preferred manifest implementation by via the protocol of the Manifest Location. -The protocol corresponds to the keys of this ordered dictionary. - -If no protocol is specified, we assume the user wants to write to the local file system. -If no key is found, the `NoOpManifest` option will be chosen. See `parsers:parse_manifest_location`. -""" -MANIFESTS = collections.OrderedDict({ - 'cli': ConsoleManifest, - 'fs': FirestoreManifest, - 'bq': BQManifest, - '': LocalManifest, -}) - -if __name__ == '__main__': - # Execute doc tests - import doctest - - doctest.testmod() diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index 12f49170..ddaa3d52 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -9,9 +9,7 @@ def start_processing_config(config_file, licenses): config = {} - # TODO: Make use of db_service instead of FirestoreManifest. - manifest_location = "XXXXXXXXX" - manifest = FirestoreManifest(manifest_location) + manifest = FirestoreManifest() with open(config_file, 'r', encoding='utf-8') as f: # configs/example.cfg -> example.cfg diff --git a/weather_dl_v2/fastapi-server/config_processing/stores.py b/weather_dl_v2/fastapi-server/config_processing/stores.py index 0ecd36fa..363aa49e 100644 --- a/weather_dl_v2/fastapi-server/config_processing/stores.py +++ b/weather_dl_v2/fastapi-server/config_processing/stores.py @@ -9,7 +9,6 @@ from apache_beam.io.filesystems import FileSystems - class Store(abc.ABC): """A interface to represent where downloads are stored. diff --git a/weather_dl_v2/fastapi-server/config_processing/util.py b/weather_dl_v2/fastapi-server/config_processing/util.py index 862dc7eb..7b8b128d 100644 --- a/weather_dl_v2/fastapi-server/config_processing/util.py +++ b/weather_dl_v2/fastapi-server/config_processing/util.py @@ -2,7 +2,6 @@ import geojson import hashlib import itertools -import logging import os import socket import subprocess @@ -17,7 +16,6 @@ from urllib.parse import urlparse from google.api_core.exceptions import BadRequest -logger = logging.getLogger(__name__) LATITUDE_RANGE = (-90, 90) LONGITUDE_RANGE = (-180, 180) @@ -74,7 +72,7 @@ def copy(src: str, dst: str) -> None: try: subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: - logger.error(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + print(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') raise @@ -82,7 +80,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - logger.debug('Serializing to JSON') + print('Serializing to JSON') if pd.isna(value) or value is None: return None @@ -183,5 +181,5 @@ def download_with_aria2(url: str, path: str) -> None: check=True, capture_output=True) except subprocess.CalledProcessError as e: - logger.error(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + print(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') raise diff --git a/weather_dl_v2/fastapi-server/db_service/database.py b/weather_dl_v2/fastapi-server/db_service/database.py index bdcd2f61..4d49fcf4 100644 --- a/weather_dl_v2/fastapi-server/db_service/database.py +++ b/weather_dl_v2/fastapi-server/db_service/database.py @@ -12,7 +12,8 @@ class Database(abc.ABC): @abc.abstractmethod def _get_db(self): pass - + +class CRUDOperations(abc.ABC): @abc.abstractmethod def _start_download(self, config_name: str, client_name: str) -> None: pass @@ -96,7 +97,7 @@ def _update_queues_on_stop_download(self, config_name: str) -> None: -class FirestoreClient(Database): +class FirestoreClient(Database, CRUDOperations): def _get_db(self) -> firestore.firestore.Client: """Acquire a firestore client, initializing the firebase app if necessary. Will attempt to get the db client five times. If it's still unsuccessful, a diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 638cad88..874506fd 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -6,6 +6,7 @@ @asynccontextmanager async def lifespan(app: FastAPI): # Boot up + # TODO: Replace hard-coded collection name by read a server config. print("Create database if not already exists.") print("Retrieve license information & create license deployment if needed.") yield From 600d93a6c7d07781b47975271d7a738d6981b8d8 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Fri, 19 May 2023 11:32:50 +0000 Subject: [PATCH 05/51] Updated Dockerfile to create server image. --- weather_dl_v2/fastapi-server/Dockerfile | 26 ++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/weather_dl_v2/fastapi-server/Dockerfile b/weather_dl_v2/fastapi-server/Dockerfile index 9e2025a7..08e0c60c 100644 --- a/weather_dl_v2/fastapi-server/Dockerfile +++ b/weather_dl_v2/fastapi-server/Dockerfile @@ -1 +1,25 @@ -# TODO: Write a docker file to create the image of FastAPI server. \ No newline at end of file +FROM continuumio/miniconda3:latest + +EXPOSE 8080 + +# Update miniconda +RUN conda update conda -y + +# Add the mamba solver for faster builds +RUN conda install -n base conda-libmamba-solver +RUN conda config --set solver libmamba + +COPY . . +# Create conda env using environment.yml +RUN conda env create -f environment.yml --debug + +# Activate the conda env and update the PATH +ARG CONDA_ENV_NAME=weather-dl-v2-server +RUN echo "source activate ${CONDA_ENV_NAME}" >> ~/.bashrc +ENV PATH /opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH + +# Use the ping endpoint as a healthcheck, +# so Docker knows if the API is still running ok or needs to be restarted +HEALTHCHECK --interval=21s --timeout=3s --start-period=10s CMD curl --fail http://localhost:8080/ping || exit 1 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] From 019574a76addb9d09d3809fe80c45b8cb0025a25 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Mon, 22 May 2023 07:40:19 +0000 Subject: [PATCH 06/51] Added nginx server. --- weather_dl_v2/nginx-server/Dockerfile | 8 ++++ weather_dl_v2/nginx-server/README.md | 63 +++++++++++++++++++++++++++ weather_dl_v2/nginx-server/nginx.conf | 10 +++++ 3 files changed, 81 insertions(+) create mode 100644 weather_dl_v2/nginx-server/Dockerfile create mode 100644 weather_dl_v2/nginx-server/README.md create mode 100644 weather_dl_v2/nginx-server/nginx.conf diff --git a/weather_dl_v2/nginx-server/Dockerfile b/weather_dl_v2/nginx-server/Dockerfile new file mode 100644 index 00000000..68b2b666 --- /dev/null +++ b/weather_dl_v2/nginx-server/Dockerfile @@ -0,0 +1,8 @@ +# server environment +FROM nginx:alpine +COPY ./nginx.conf /etc/nginx/conf.d/configfile.template + +ENV PORT 8080 +ENV HOST 0.0.0.0 +EXPOSE 8080 +CMD sh -c "envsubst '\$PORT' < /etc/nginx/conf.d/configfile.template > /etc/nginx/conf.d/default.conf && nginx -g 'daemon off;'" \ No newline at end of file diff --git a/weather_dl_v2/nginx-server/README.md b/weather_dl_v2/nginx-server/README.md new file mode 100644 index 00000000..2dda4090 --- /dev/null +++ b/weather_dl_v2/nginx-server/README.md @@ -0,0 +1,63 @@ +## Deployment Instructions: + +Due to our org level policy we can't expose external-ip using LoadBalancer Service +while deploying our FastAPI server. + +In case your project don't have any such restriction a +then no need to create a nginx-server on VM to access this fastapi server +instead directly hit the external-ip exposed by LoadBalancer service on kubernetes. + +* **Replace the FastAPI server Pod's IP in nginx.conf**: +``` +Please write down the FastAPI server Pod's IP at Line 8 of nginx.conf. +``` +> Note: Command to get the Pod IP : `kubectl get pods -o wide`. +> +> Though note that in case of Pod restart IP might get change. So we need to look +> for better solution for the same. + +* **Create docker image for nginx-server**: +``` +export PROJECT_ID= +export REPO= eg:weather-tools + +gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-nginx-server" --timeout=79200 --machine-type=e2-highcpu-32 +``` + +* **Create a VM using above craeted docker-image**: +``` +export ZONE= eg: us-cental1-a +export SERVICE_ACCOUNT= # Let's keep this as Compute Engine Default Service Account +export IMAGE_PATH= # The above created image-path + +gcloud compute instances create-with-container weather-dl-v2-nginx-server \ + --project=$PROJECT_ID \ + --zone=$ZONE \ + --machine-type=e2-medium \ + --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default \ + --maintenance-policy=MIGRATE \ + --provisioning-model=STANDARD \ + --service-account=$SERVICE_ACCOUNT \ + --scopes=https://www.googleapis.com/auth/cloud-platform \ + --tags=http-server,https-server \ + --image=projects/cos-cloud/global/images/cos-stable-105-17412-101-4 \ + --boot-disk-size=10GB \ + --boot-disk-type=pd-balanced \ + --boot-disk-device-name=weather-dl-v2-nginx-server \ + --container-image=$IMAGE_PATH \ + --container-restart-policy=on-failure \ + --no-shielded-secure-boot \ + --shielded-vtpm \ + --shielded-integrity-monitoring \ + --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-4 +``` + +* **Hit our fastapi server after doing ssh in the above create VM**: +``` +curl localhost:8080 +``` + +* **Upload config file to fastapi server from VM**: +``` +curl -F file=@example.cfg localhost:8080/uploadfile/ +``` diff --git a/weather_dl_v2/nginx-server/nginx.conf b/weather_dl_v2/nginx-server/nginx.conf new file mode 100644 index 00000000..ca7f5ad1 --- /dev/null +++ b/weather_dl_v2/nginx-server/nginx.conf @@ -0,0 +1,10 @@ +server { + listen 8080; + listen [::]:8080; + + server_name localhost; + + location / { + proxy_pass http://:8080; + } +} \ No newline at end of file From e7101aed9b911d4399a9f1949e24edee17492047 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Mon, 22 May 2023 11:10:32 +0000 Subject: [PATCH 07/51] fix linter issues. --- .../config_processing/manifest.py | 7 +-- .../config_processing/parsers.py | 4 +- .../config_processing/partition.py | 12 ++-- .../config_processing/pipeline.py | 9 +-- .../fastapi-server/db_service/database.py | 59 +++++++++---------- weather_dl_v2/fastapi-server/main.py | 4 +- .../fastapi-server/routers/download.py | 35 +++++------ .../fastapi-server/routers/license.py | 14 +++-- .../fastapi-server/routers/queues.py | 8 +-- 9 files changed, 76 insertions(+), 76 deletions(-) diff --git a/weather_dl_v2/fastapi-server/config_processing/manifest.py b/weather_dl_v2/fastapi-server/config_processing/manifest.py index eec16749..5a405816 100644 --- a/weather_dl_v2/fastapi-server/config_processing/manifest.py +++ b/weather_dl_v2/fastapi-server/config_processing/manifest.py @@ -445,12 +445,11 @@ def _update(self, download_status: DownloadStatus) -> None: result: WriteResult = download_doc_ref.set(status) print(f'Firestore manifest updated. ' - f'update_time={result.update_time}, ' - f'filename={download_status.location}.') + f'update_time={result.update_time}, ' + f'filename={download_status.location}.') def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" - # Get user-defined collection for manifest. + # TODO: Get user-defined collection for manifest. root_collection = 'test_manifest' return self._get_db().collection(root_collection).document(store_scheme) - diff --git a/weather_dl_v2/fastapi-server/config_processing/parsers.py b/weather_dl_v2/fastapi-server/config_processing/parsers.py index 0eaadc4b..08c0b956 100644 --- a/weather_dl_v2/fastapi-server/config_processing/parsers.py +++ b/weather_dl_v2/fastapi-server/config_processing/parsers.py @@ -10,9 +10,10 @@ import typing as t import numpy as np from collections import OrderedDict +from .config import Config CLIENTS = ['cds', 'mars', 'ecpublic'] -from .config import Config + def date(candidate: str) -> datetime.date: """Converts ECMWF-format date strings into a `datetime.date`. @@ -445,4 +446,3 @@ def get_subsections(config: Config) -> t.List[t.Tuple[str, t.Dict]]: """ return [(name, params) for name, params in config.kwargs.items() if isinstance(params, dict)] or [('default', {})] - diff --git a/weather_dl_v2/fastapi-server/config_processing/partition.py b/weather_dl_v2/fastapi-server/config_processing/partition.py index 08225cd3..bba76e41 100644 --- a/weather_dl_v2/fastapi-server/config_processing/partition.py +++ b/weather_dl_v2/fastapi-server/config_processing/partition.py @@ -53,7 +53,6 @@ def _create_partition_config(self, option: t.Tuple) -> Config: out.selection = copy return out - def skip_partition(self, config: Config) -> bool: """Return true if partition should be skipped.""" @@ -68,7 +67,6 @@ def skip_partition(self, config: Config) -> bool: return False - def prepare_partitions(self) -> t.Iterator[Config]: """Iterate over client parameters, partitioning over `partition_keys`. @@ -84,19 +82,17 @@ def prepare_partitions(self) -> t.Iterator[Config]: for option in itertools.product(*[self.config.selection[key] for key in self.config.partition_keys]): yield self._create_partition_config(option) - def new_downloads_only(self, candidate: Config) -> bool: """Predicate function to skip already downloaded partitions.""" if self.store is None: self.store = FSStore() should_skip = self.skip_partition(candidate) - if should_skip: - print("Skipped.") - return not should_skip + return not should_skip def update_manifest_collection(self, partition: Config) -> Config: """Updates the DB.""" location = prepare_target_name(partition) - self.manifest.schedule(partition.config_name, partition.dataset, partition.selection, location, partition.user_id) - print(f'Created partition {location!r}.') \ No newline at end of file + self.manifest.schedule(partition.config_name, partition.dataset, + partition.selection, location, partition.user_id) + print(f'Created partition {location!r}.') diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index ddaa3d52..fcb3c402 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -6,7 +6,8 @@ from db_service.database import FirestoreClient db_client = FirestoreClient() - + + def start_processing_config(config_file, licenses): config = {} manifest = FirestoreManifest() @@ -15,10 +16,10 @@ def start_processing_config(config_file, licenses): # configs/example.cfg -> example.cfg config_name = os.path.split(config_file)[1] config = process_config(f, config_name) - + config.force_download = True config.user_id = getpass.getuser() - + partition_obj = PartitionConfig(config, None, manifest) # Prepare partitions @@ -26,7 +27,7 @@ def start_processing_config(config_file, licenses): # Skip existing downloads if partition_obj.new_downloads_only(partition): partition_obj.update_manifest_collection(partition) - + # Make entry in 'download' & 'queues' collection. db_client._start_download(config_name, config.client) db_client._update_queues_on_start_download(config_name, licenses) diff --git a/weather_dl_v2/fastapi-server/db_service/database.py b/weather_dl_v2/fastapi-server/db_service/database.py index 4d49fcf4..0765f63e 100644 --- a/weather_dl_v2/fastapi-server/db_service/database.py +++ b/weather_dl_v2/fastapi-server/db_service/database.py @@ -13,35 +13,36 @@ class Database(abc.ABC): def _get_db(self): pass + class CRUDOperations(abc.ABC): @abc.abstractmethod def _start_download(self, config_name: str, client_name: str) -> None: pass - + @abc.abstractmethod def _stop_download(self, config_name: str) -> None: pass - + @abc.abstractmethod def _check_download_exists(self, config_name: str) -> bool: pass - + @abc.abstractmethod def _add_license(self, license_dict: dict) -> str: pass - + @abc.abstractmethod def _delete_license(self, license_id: str) -> str: pass - + @abc.abstractmethod def _create_license_queue(self, license_id: str, client_name: str) -> None: pass - + @abc.abstractmethod def _remove_license_queue(self, license_id: str) -> None: pass - + @abc.abstractmethod def _get_queues(self) -> list: pass @@ -49,11 +50,11 @@ def _get_queues(self) -> list: @abc.abstractmethod def _get_queue_by_license_id(self, license_id: str) -> dict: pass - + @abc.abstractmethod def _get_queue_by_client_name(self, client_name: str) -> list: pass - + @abc.abstractmethod def _update_license_queue(self, license_id: str, priority_list: list) -> None: pass @@ -61,7 +62,7 @@ def _update_license_queue(self, license_id: str, priority_list: list) -> None: @abc.abstractmethod def _check_license_exists(self, license_id: str) -> bool: pass - + @abc.abstractmethod def _get_license_by_license_id(slef, license_id: str) -> dict: pass @@ -69,7 +70,7 @@ def _get_license_by_license_id(slef, license_id: str) -> dict: @abc.abstractmethod def _get_license_by_client_name(self, client_name: str) -> list: pass - + @abc.abstractmethod def _get_licenses(self) -> list: pass @@ -78,7 +79,7 @@ def _get_licenses(self) -> list: def _update_license(self, license_id: str, license_dict: dict) -> None: pass - # TODO: Find better way to execute these query. + # TODO: Find better way to execute these query. # @abc.abstractmethod # def _get_download_by_config_name(self, config_name: str) -> dict: # pass @@ -86,7 +87,7 @@ def _update_license(self, license_id: str, license_dict: dict) -> None: # @abc.abstractmethod # def _get_dowloads(self) -> list: # pass - + @abc.abstractmethod def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: pass @@ -95,7 +96,6 @@ def _update_queues_on_start_download(self, config_name: str, licenses: list) -> def _update_queues_on_stop_download(self, config_name: str) -> None: pass - class FirestoreClient(Database, CRUDOperations): def _get_db(self) -> firestore.firestore.Client: @@ -126,7 +126,6 @@ def _get_db(self) -> firestore.firestore.Client: attempts += 1 return db - def _start_download(self, config_name: str, client_name: str) -> None: result: WriteResult = self._get_db().collection('download').document(config_name).set( @@ -134,15 +133,15 @@ def _start_download(self, config_name: str, client_name: str) -> None: ) print(f"Added {config_name} in 'download' collection. Update_time: {result.update_time}.") - + def _stop_download(self, config_name: str) -> None: timestamp = self._get_db().collection('download').document(config_name).delete() print(f"Removed {config_name} in 'download' collection. Update_time: {timestamp}.") - + def _check_download_exists(self, config_name: str) -> bool: result: DocumentSnapshot = self._get_db().collection('download').document(config_name).get() return result.exists - + def _add_license(self, license_dict: dict) -> str: license_id = f"L{len(self._get_db().collection('license').get()) + 1}" license_dict["license_id"] = license_id @@ -151,45 +150,45 @@ def _add_license(self, license_dict: dict) -> str: ) print(f"Added {license_id} in 'license' collection. Update_time: {result.update_time}.") return license_id - + def _delete_license(self, license_id: str) -> None: timestamp = self._get_db().collection('license').document(license_id).delete() print(f"Removed {license_id} in 'license' collection. Update_time: {timestamp}.") - + def _update_license(self, license_id: str, license_dict: dict) -> None: result: WriteResult = self._get_db().collection('license').document(license_id).update({ license_dict }) print(f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}.") - + def _create_license_queue(self, license_id: str, client_name: str) -> None: result: WriteResult = self._get_db().collection('queues').document(license_id).set( - {"license_id": license_id, "client_name": client_name,"queue": []} + {"license_id": license_id, "client_name": client_name, "queue": []} ) print(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") - + def _remove_license_queue(self, license_id: str) -> None: timestamp = self._get_db().collection('queues').document(license_id).delete() print(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") - + def _get_queues(self) -> list: snapshot_list = self._get_db().collection('queues').get() result = [] for snapshot in snapshot_list: result.append(self._get_db().collection('queues').document(snapshot.id).get().to_dict()) return result - + def _get_queue_by_license_id(self, license_id: str) -> dict: result: DocumentSnapshot = self._get_db().collection('queues').document(license_id).get() return result.to_dict() - + def _get_queue_by_client_name(self, client_name: str) -> list: snapshot_list = self._get_db().collection('queues').where('client_name', '==', client_name).get() result = [] for snapshot in snapshot_list: result.append(snapshot.to_dict()) return result - + def _update_license_queue(self, license_id: str, priority_list: list) -> None: result: WriteResult = self._get_db().collection('queues').document(license).update( {'queue': priority_list} @@ -199,7 +198,7 @@ def _update_license_queue(self, license_id: str, priority_list: list) -> None: def _check_license_exists(self, license_id: str) -> bool: result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() return result.exists - + def _get_license_by_license_id(self, license_id: str) -> dict: result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() return result.to_dict() @@ -210,7 +209,7 @@ def _get_license_by_client_name(self, client_name: str) -> list: for snapshot in snapshot_list: result.append(snapshot.to_dict()) return result - + def _get_licenses(self) -> list: snapshot_list = self._get_db().collection('license').get() result = [] @@ -226,7 +225,7 @@ def _get_licenses(self) -> list: # pass def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: - for license in licenses: + for license in licenses: result: WriteResult = self._get_db().collection('queues').document(license).update( {'queue': firestore.ArrayUnion([config_name])} ) diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 874506fd..33abdf79 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -3,6 +3,7 @@ from fastapi.responses import HTMLResponse from routers import license, download, queues + @asynccontextmanager async def lifespan(app: FastAPI): # Boot up @@ -18,6 +19,7 @@ async def lifespan(app: FastAPI): app.include_router(download.router) app.include_router(queues.router) + @app.get("/") async def main(): content = """ @@ -25,4 +27,4 @@ async def main(): Greetings from weather-dl v2 !! """ - return HTMLResponse(content=content) \ No newline at end of file + return HTMLResponse(content=content) diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index f57c8955..3e173c14 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -21,39 +21,40 @@ def upload(file: UploadFile): # Can submit a config to the server. @router.post("/") -def submit_download(file: UploadFile | None = None, licenses: list = [], background_tasks: BackgroundTasks = BackgroundTasks()): +def submit_download(file: UploadFile | None = None, licenses: list = [], + background_tasks: BackgroundTasks = BackgroundTasks()): if not file: return {"message": "No upload file sent."} else: if db_client._check_download_exists(file.filename): - raise HTTPException(status_code=400, - detail=f"Please stop the ongoing download of the config file '{file.filename}' " - "before attempting to start a new download.") + raise HTTPException(status_code=400, + detail=f"Please stop the ongoing download of the config file '{file.filename}' " + "before attempting to start a new download.") try: dest = upload(file) # Start processing config. background_tasks.add_task(start_processing_config, dest, licenses) return {"message": f"file '{file.filename}' saved at '{dest}' successfully."} - except: + except Exception: return {"message": f"Failed to save file '{file.filename}'."} - + # Can check the current status of the submitted config. # List status for all the downloads + handle filters @router.get("/") async def get_downloads(client_name: str | None = None): - # Get this kind of response by querying fake_download_db + fake_manifest_db. + # Get this kind of response by querying download collection + manifest collection. if client_name: - result = { "config_name": "config_3", "client_name": client_name,"total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0 } + result = {"config_name": "config_3", "client_name": client_name, "total_shards": 10000, + "scheduled_shards": 4990, "downloaded_shards": 5000, "failed_shards": 0} else: result = [ - { "config_name": "config_1", "client_name": "MARS","total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0 }, - { "config_name": "config_2", "client_name": "MARS","total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0 }, - { "config_name": "config_3", "client_name": "CDS","total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0 } + {"config_name": "config_1", "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0}, + {"config_name": "config_2", "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0}, + {"config_name": "config_3", "client_name": "CDS", "total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0} ] return result @@ -65,8 +66,8 @@ async def get_download(config_name: str): raise HTTPException(status_code=404, detail="Download config not found in weather-dl v2.") # Get this kind of response by querying fake_manifest_db. - result = { "config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0 } + result = {"config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0} return result diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 2e4ee019..7575ec9e 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -3,7 +3,10 @@ from db_service.database import FirestoreClient db_client = FirestoreClient() - + + +# TODO: Make use of google secret manager. +# REF: https://cloud.google.com/secret-manager. class License(BaseModel): client_name: str number_of_requests: int @@ -47,7 +50,7 @@ async def get_license_by_license_id(license_id: str): async def update_license(license_id: str, license: License): if not db_client._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to update.") - + license_dict = license.dict() db_client._update_license(license_id, license_dict) # TODO: Add a background task to create k8s deployement for this updated license. @@ -64,12 +67,11 @@ async def update_license_internal(license_id: str, k8s_deployment_id: str): db_client._update_license(license_id, license_dict) return {"license_id": license_id, "message": "License updated successfully."} - - + + # Add new license @router.post("/") async def add_license(license: License): - print(license) license_dict = license.dict() license_dict['k8s_deployment_id'] = "" license_id = db_client._add_license(license_dict) @@ -88,4 +90,4 @@ async def delete_license(license_id: str): db_client._remove_license_queue(license_id) # TODO: Add a background task to delete k8s deployement for this deleted license. # And update entry of 'k8s_deployment_id' entry in 'license' collection. - return {"license_id": license_id, "message": "License removed successfully."} \ No newline at end of file + return {"license_id": license_id, "message": "License removed successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py index f9eeed04..9b8eebee 100644 --- a/weather_dl_v2/fastapi-server/routers/queues.py +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -9,8 +9,8 @@ responses={404: {"description": "Not found"}}, ) -# Users can change the execution order of config per license basis. +# Users can change the execution order of config per license basis. # List the licenses priority + {client_name} filter @router.get("/") async def get_all_license_queue(client_name: str | None = None): @@ -28,7 +28,7 @@ async def get_license_queue(license_id: str): if not result: raise HTTPException(status_code=404, detail="License's priority not found.") return result - + # Change config's priority on particular license @router.post("/{license_id}") @@ -38,5 +38,5 @@ def modify_license_queue(license_id: str, priority_list: list | None = []): try: db_client._update_license_queue(license_id, priority_list) return {"message": f"'{license_id}' license priority updated successfully."} - except: - return {"message": f"Failed to update '{license_id}' license priority."} \ No newline at end of file + except Exception: + return {"message": f"Failed to update '{license_id}' license priority."} From 3b3eb69072826cc284205ea512e97791d6bca746 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Thu, 8 Jun 2023 04:25:05 +0000 Subject: [PATCH 08/51] Inital commit for license deployment. --- weather_dl_v2/README.md | 7 + weather_dl_v2/__init__.py | 0 .../downloader_kubernetes/Dockerfile | 31 ++ weather_dl_v2/downloader_kubernetes/README.md | 34 ++ .../downloader_kubernetes/downloader.py | 56 +++ .../downloader_kubernetes/downloader.yaml | 26 + .../downloader_kubernetes/environment.yml | 17 + .../downloader_kubernetes/manifest.py | 442 +++++++++++++++++ weather_dl_v2/downloader_kubernetes/util.py | 185 +++++++ weather_dl_v2/fastapi-server/Dockerfile | 1 + .../fastapi-server/routers/license.py | 1 + weather_dl_v2/license_deployment/Dockerfile | 20 + weather_dl_v2/license_deployment/README.md | 37 ++ weather_dl_v2/license_deployment/__init__.py | 0 weather_dl_v2/license_deployment/clients.py | 396 +++++++++++++++ weather_dl_v2/license_deployment/config.py | 101 ++++ weather_dl_v2/license_deployment/database.py | 106 ++++ .../license_deployment/environment.yml | 16 + weather_dl_v2/license_deployment/fetch.py | 92 ++++ .../license_deployment/job_creator.py | 23 + .../license_deployment.yaml | 64 +++ weather_dl_v2/license_deployment/manifest.py | 457 ++++++++++++++++++ weather_dl_v2/license_deployment/util.py | 185 +++++++ 23 files changed, 2297 insertions(+) create mode 100644 weather_dl_v2/README.md create mode 100644 weather_dl_v2/__init__.py create mode 100644 weather_dl_v2/downloader_kubernetes/Dockerfile create mode 100644 weather_dl_v2/downloader_kubernetes/README.md create mode 100644 weather_dl_v2/downloader_kubernetes/downloader.py create mode 100644 weather_dl_v2/downloader_kubernetes/downloader.yaml create mode 100644 weather_dl_v2/downloader_kubernetes/environment.yml create mode 100644 weather_dl_v2/downloader_kubernetes/manifest.py create mode 100644 weather_dl_v2/downloader_kubernetes/util.py create mode 100644 weather_dl_v2/license_deployment/Dockerfile create mode 100644 weather_dl_v2/license_deployment/README.md create mode 100644 weather_dl_v2/license_deployment/__init__.py create mode 100644 weather_dl_v2/license_deployment/clients.py create mode 100644 weather_dl_v2/license_deployment/config.py create mode 100644 weather_dl_v2/license_deployment/database.py create mode 100644 weather_dl_v2/license_deployment/environment.yml create mode 100644 weather_dl_v2/license_deployment/fetch.py create mode 100644 weather_dl_v2/license_deployment/job_creator.py create mode 100644 weather_dl_v2/license_deployment/license_deployment.yaml create mode 100644 weather_dl_v2/license_deployment/manifest.py create mode 100644 weather_dl_v2/license_deployment/util.py diff --git a/weather_dl_v2/README.md b/weather_dl_v2/README.md new file mode 100644 index 00000000..2b0649b9 --- /dev/null +++ b/weather_dl_v2/README.md @@ -0,0 +1,7 @@ +## weather-dl-v2 + +* **Sequence of steps:** +1) Refer to downloader_kubernetes/README.md +2) Refer to license_deployment/README.md +3) Refer to fastapi-server/README.md + diff --git a/weather_dl_v2/__init__.py b/weather_dl_v2/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/downloader_kubernetes/Dockerfile b/weather_dl_v2/downloader_kubernetes/Dockerfile new file mode 100644 index 00000000..d96353f7 --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/Dockerfile @@ -0,0 +1,31 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +FROM continuumio/miniconda3:latest + +# Update miniconda +RUN conda update conda -y + +# Add the mamba solver for faster builds +RUN conda install -n base conda-libmamba-solver +RUN conda config --set solver libmamba + +# Create conda env using environment.yml +COPY . . +RUN conda env create -f environment.yml --debug + +# Activate the conda env and update the PATH +ARG CONDA_ENV_NAME=weather-dl-v2-downloader +RUN echo "source activate ${CONDA_ENV_NAME}" >> ~/.bashrc +ENV PATH /opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH diff --git a/weather_dl_v2/downloader_kubernetes/README.md b/weather_dl_v2/downloader_kubernetes/README.md new file mode 100644 index 00000000..2fa1cc76 --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/README.md @@ -0,0 +1,34 @@ +# Deployment / Usage Instruction + +* **User authorization required to set up the environment**: +* roles/container.admin + +* **Authorization needed for the tool to operate**: +We are not configuring any service account here hence make sure that compute engine default service account have roles: +* roles/storage.admin +* roles/bigquery.dataEditor +* roles/bigquery.jobUser + +* **Write the manifest location path** +``` +Please write down the manifest path at Line 43 of downloader.py. +Eg: "fs://test_manifest?projectId=XXX" +``` + +* **Create docker image for downloader**: +``` +export REPO= eg:weather-tools + +gcloud builds submit Dockerfile --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-downloader" --timeout=79200 --machine-type=e2-highcpu-32 +``` + +* **Add path of created downloader image in downloader.yaml**: +``` +Please write down the downloader's docker image path at Line 11 of downloader.yaml. +``` + +## General Commands +* **For viewing the current pods**: +``` +kubectl get pods +``` diff --git a/weather_dl_v2/downloader_kubernetes/downloader.py b/weather_dl_v2/downloader_kubernetes/downloader.py new file mode 100644 index 00000000..d81fcc43 --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/downloader.py @@ -0,0 +1,56 @@ +""" +This program downloads ECMWF data & upload it into GCS. +""" +import tempfile +import os +import sys +from manifest import FirestoreManifest, Stage +from util import copy, download_with_aria2 +import datetime + +def download(url: str, path: str) -> None: + """Download data from client, with retries.""" + if path: + if os.path.exists(path): + # Empty the target file, if it already exists, otherwise the + # transfer below might be fooled into thinking we're resuming + # an interrupted download. + open(path, "w").close() + download_with_aria2(url, path) + + +def main(config_name, dataset, selection, user_id, url, target_path) -> None: + """Download data from a client to a temp file.""" + + manifest_location = "XXXXXXXXXX" + manifest = FirestoreManifest(manifest_location) + temp_name = "" + with manifest.transact(config_name, dataset, selection, target_path, user_id): + with tempfile.NamedTemporaryFile(delete=False) as temp: + temp_name = temp.name + manifest.set_stage(Stage.DOWNLOAD) + precise_download_start_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + manifest.prev_stage_precise_start_time = precise_download_start_time + print(f'Downloading data for {target_path!r}.') + download(url, temp_name) + print(f'Download completed for {target_path!r}.') + + manifest.set_stage(Stage.UPLOAD) + precise_upload_start_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + manifest.prev_stage_precise_start_time = precise_upload_start_time + print(f'Uploading to store for {target_path!r}.') + copy(temp_name, target_path) + print(f'Upload to store complete for {target_path!r}.') + os.unlink(temp_name) + +if __name__ == '__main__': + temp_args = sys.argv + main(temp_args[1], temp_args[2], temp_args[3], temp_args[4], temp_args[5], temp_args[6]) diff --git a/weather_dl_v2/downloader_kubernetes/downloader.yaml b/weather_dl_v2/downloader_kubernetes/downloader.yaml new file mode 100644 index 00000000..9b7252f8 --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/downloader.yaml @@ -0,0 +1,26 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: downloader-with-ttl +spec: + ttlSecondsAfterFinished: 0 + template: + spec: + containers: + - name: downloader + image: XXXXXXX + imagePullPolicy: Always + command: [] + resources: + requests: + cpu: "1000m" # CPU: 1 vCPU + memory: "2Gi" # RAM: 2 GiB + ephemeral-storage: "100Gi" # Storage: 100 GiB + volumeMounts: + - name: data + mountPath: /data + restartPolicy: Never + volumes: + - name: data + emptyDir: + sizeLimit: 100Gi \ No newline at end of file diff --git a/weather_dl_v2/downloader_kubernetes/environment.yml b/weather_dl_v2/downloader_kubernetes/environment.yml new file mode 100644 index 00000000..79e75565 --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/environment.yml @@ -0,0 +1,17 @@ +name: weather-dl-v2-downloader +channels: + - conda-forge +dependencies: + - python=3.10 + - google-cloud-sdk=410.0.0 + - aria2=1.36.0 + - geojson=2.5.0=py_0 + - xarray=2022.11.0 + - google-apitools + - pip=22.3 + - pip: + - apache_beam[gcp]==2.40.0 + - firebase-admin + - google-cloud-pubsub + - kubernetes + - psutil diff --git a/weather_dl_v2/downloader_kubernetes/manifest.py b/weather_dl_v2/downloader_kubernetes/manifest.py new file mode 100644 index 00000000..e082bf8e --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/manifest.py @@ -0,0 +1,442 @@ +"""Client interface for connecting to a manifest.""" + +import abc +import dataclasses +import datetime +import enum +import json +import pandas as pd +import time +import traceback +import typing as t + +from util import ( + to_json_serializable_type, + fetch_geo_polygon, + get_file_size, + get_wait_interval, + generate_md5_hash, + GLOBAL_COVERAGE_AREA +) + +import firebase_admin +from firebase_admin import credentials +from firebase_admin import firestore +from google.cloud.firestore_v1 import DocumentReference +from google.cloud.firestore_v1.types import WriteResult + +"""An implementation-dependent Manifest URI.""" +Location = t.NewType('Location', str) + + +class ManifestException(Exception): + """Errors that occur in Manifest Clients.""" + pass + + +class Stage(enum.Enum): + """A request can be either in one of the following stages at a time: + + fetch : This represents request is currently in fetch stage i.e. request placed on the client's server + & waiting for some result before starting download (eg. MARS client). + download : This represents request is currently in download stage i.e. data is being downloading from client's + server to the worker's local file system. + upload : This represents request is currently in upload stage i.e. data is getting uploaded from worker's local + file system to target location (GCS path). + retrieve : In case of clients where there is no proper separation of fetch & download stages (eg. CDS client), + request will be in the retrieve stage i.e. fetch + download. + """ + RETRIEVE = 'retrieve' + FETCH = 'fetch' + DOWNLOAD = 'download' + UPLOAD = 'upload' + + +class Status(enum.Enum): + """Depicts the request's state status: + + scheduled : A request partition is created & scheduled for processing. + Note: Its corresponding state can be None only. + in-progress : This represents the request state is currently in-progress (i.e. running). + The next status would be "success" or "failure". + success : This represents the request state execution completed successfully without any error. + failure : This represents the request state execution failed. + """ + SCHEDULED = 'scheduled' + IN_PROGRESS = 'in-progress' + SUCCESS = 'success' + FAILURE = 'failure' + + +@dataclasses.dataclass +class DownloadStatus(): + """Data recorded in `Manifest`s reflecting the status of a download.""" + + """The name of the config file associated with the request.""" + config_name: str = "" + + """Represents the dataset field of the configuration.""" + dataset: t.Optional[str] = "" + + """Copy of selection section of the configuration.""" + selection: t.Dict = dataclasses.field(default_factory=dict) + + """Location of the downloaded data.""" + location: str = "" + + """Represents area covered by the shard.""" + area: str = "" + + """Current stage of request : 'fetch', 'download', 'retrieve', 'upload' or None.""" + stage: t.Optional[Stage] = None + + """Download status: 'scheduled', 'in-progress', 'success', or 'failure'.""" + status: t.Optional[Status] = None + + """Cause of error, if any.""" + error: t.Optional[str] = "" + + """Identifier for the user running the download.""" + username: str = "" + + """Shard size in GB.""" + size: t.Optional[float] = 0 + + """A UTC datetime when download was scheduled.""" + scheduled_time: t.Optional[str] = "" + + """A UTC datetime when the retrieve stage starts.""" + retrieve_start_time: t.Optional[str] = "" + + """A UTC datetime when the retrieve state ends.""" + retrieve_end_time: t.Optional[str] = "" + + """A UTC datetime when the fetch state starts.""" + fetch_start_time: t.Optional[str] = "" + + """A UTC datetime when the fetch state ends.""" + fetch_end_time: t.Optional[str] = "" + + """A UTC datetime when the download state starts.""" + download_start_time: t.Optional[str] = "" + + """A UTC datetime when the download state ends.""" + download_end_time: t.Optional[str] = "" + + """A UTC datetime when the upload state starts.""" + upload_start_time: t.Optional[str] = "" + + """A UTC datetime when the upload state ends.""" + upload_end_time: t.Optional[str] = "" + + @classmethod + def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus': + """Instantiate DownloadStatus dataclass from dict.""" + download_status_instance = cls() + for key, value in download_status.items(): + if key == 'status': + setattr(download_status_instance, key, Status(value)) + elif key == 'stage' and value is not None: + setattr(download_status_instance, key, Stage(value)) + else: + setattr(download_status_instance, key, value) + return download_status_instance + + @classmethod + def to_dict(cls, instance) -> t.Dict: + """Return the fields of a dataclass instance as a manifest ingestible + dictionary mapping of field names to field values.""" + download_status_dict = {} + for field in dataclasses.fields(instance): + key = field.name + value = getattr(instance, field.name) + if isinstance(value, Status) or isinstance(value, Stage): + download_status_dict[key] = value.value + elif isinstance(value, pd.Timestamp): + download_status_dict[key] = value.isoformat() + elif key == 'selection' and value is not None: + download_status_dict[key] = json.dumps(value) + else: + download_status_dict[key] = value + return download_status_dict + + +@dataclasses.dataclass +class Manifest(abc.ABC): + """Abstract manifest of download statuses. + + Update download statuses to some storage medium. + + This class lets one indicate that a download is `scheduled` or in a transaction process. + In the event of a transaction, a download will be updated with an `in-progress`, `success` + or `failure` status (with accompanying metadata). + + Example: + ``` + my_manifest = parse_manifest_location(Location('fs://some-firestore-collection')) + + # Schedule data for download + my_manifest.schedule({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username') + + # ... + + # Initiate a transaction – it will record that the download is `in-progess` + with my_manifest.transact({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username') as tx: + # download logic here + pass + + # ... + + # on error, will record the download as a `failure` before propagating the error. By default, it will + # record download as a `success`. + ``` + + Attributes: + status: The current `DownloadStatus` of the Manifest. + """ + + # To reduce the impact of _read() and _update() calls + # on the start time of the stage. + prev_stage_precise_start_time: t.Optional[str] = None + status: t.Optional[DownloadStatus] = None + + # This is overridden in subclass. + def __post_init__(self): + """Initialize the manifest.""" + pass + + def schedule(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Indicate that a job has been scheduled for download. + + 'scheduled' jobs occur before 'in-progress', 'success' or 'finished'. + """ + scheduled_time = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat(timespec='seconds') + self.status = DownloadStatus( + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), + username=user, + stage=None, + status=Status.SCHEDULED, + error=None, + size=None, + scheduled_time=scheduled_time, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=None, + upload_end_time=None, + ) + self._update(self.status) + + def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Updates the manifest to mark the shards that were skipped in the current job + as 'upload' stage and 'success' status, indicating that they have already been downloaded. + """ + old_status = self._read(location) + # The manifest needs to be updated for a skipped shard if its entry is not present, or + # if the stage is not 'upload', or if the stage is 'upload' but the status is not 'success'. + if old_status.location != location or old_status.stage != Stage.UPLOAD or old_status.status != Status.SUCCESS: + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + size = get_file_size(location) + + status = DownloadStatus( + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), + username=user, + stage=Stage.UPLOAD, + status=Status.SUCCESS, + error=None, + size=size, + scheduled_time=None, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=current_utc_time, + upload_end_time=current_utc_time, + ) + self._update(status) + print(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + + def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Reset Manifest state in preparation for a new transaction.""" + self.status = dataclasses.replace(self._read(location)) + self.status.config_name = config_name + self.status.dataset = dataset if dataset else None + self.status.selection = selection + self.status.location = location + self.status.username = user + + def __enter__(self) -> None: + pass + + def __exit__(self, exc_type, exc_inst, exc_tb) -> None: + """Record end status of a transaction as either 'success' or 'failure'.""" + if exc_type is None: + status = Status.SUCCESS + error = None + else: + status = Status.FAILURE + # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception + error = '\n'.join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + + new_status = dataclasses.replace(self.status) + new_status.error = error + new_status.status = status + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + # This is necessary for setting the precise start time of the previous stage + # and end time of the final stage, as well as handling the case of Status.FAILURE. + if new_status.stage == Stage.FETCH: + new_status.fetch_start_time = self.prev_stage_precise_start_time + new_status.fetch_end_time = current_utc_time + elif new_status.stage == Stage.RETRIEVE: + new_status.retrieve_start_time = self.prev_stage_precise_start_time + new_status.retrieve_end_time = current_utc_time + elif new_status.stage == Stage.DOWNLOAD: + new_status.download_start_time = self.prev_stage_precise_start_time + new_status.download_end_time = current_utc_time + else: + new_status.upload_start_time = self.prev_stage_precise_start_time + new_status.upload_end_time = current_utc_time + + new_status.size = get_file_size(new_status.location) + + self.status = new_status + + self._update(self.status) + + def transact(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> 'Manifest': + """Create a download transaction.""" + self._set_for_transaction(config_name, dataset, selection, location, user) + return self + + def set_stage(self, stage: Stage) -> None: + """Sets the current stage in manifest.""" + new_status = dataclasses.replace(self.status) + new_status.stage = stage + new_status.status = Status.IN_PROGRESS + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + if stage == Stage.DOWNLOAD: + new_status.download_start_time = current_utc_time + else: + new_status.download_start_time = self.prev_stage_precise_start_time + new_status.download_end_time = current_utc_time + new_status.upload_start_time = current_utc_time + + self.status = new_status + self._update(self.status) + + @abc.abstractmethod + def _read(self, location: str) -> DownloadStatus: + pass + + @abc.abstractmethod + def _update(self, download_status: DownloadStatus) -> None: + pass + + +class FirestoreManifest(Manifest): + """A Firestore Manifest. + This Manifest implementation stores DownloadStatuses in a Firebase document store. + The document hierarchy for the manifest is as follows: + [manifest ] + ├── doc_id (md5 hash of the path) { 'selection': {...}, 'location': ..., 'username': ... } + └── etc... + Where `[]` indicates a collection and ` {...}` indicates a document. + """ + + def _get_db(self) -> firestore.firestore.Client: + """Acquire a firestore client, initializing the firebase app if necessary. + Will attempt to get the db client five times. If it's still unsuccessful, a + `ManifestException` will be raised. + """ + db = None + attempts = 0 + + while db is None: + try: + db = firestore.client() + except ValueError as e: + # The above call will fail with a value error when the firebase app is not initialized. + # Initialize the app here, and try again. + # Use the application default credentials. + cred = credentials.ApplicationDefault() + + firebase_admin.initialize_app(cred) + print('Initialized Firebase App.') + + if attempts > 4: + raise ManifestException('Exceeded number of retries to get firestore client.') from e + + time.sleep(get_wait_interval(attempts)) + + attempts += 1 + + return db + + def _read(self, location: str) -> DownloadStatus: + """Reads the JSON data from a manifest.""" + + doc_id = generate_md5_hash(location) + + # Update document with download status + download_doc_ref = ( + self.root_document_for_store(doc_id) + ) + + result = download_doc_ref.get() + row = {} + if result.exists: + records = result.to_dict() + row = {n: to_json_serializable_type(v) for n, v in records.items()} + return DownloadStatus.from_dict(row) + + def _update(self, download_status: DownloadStatus) -> None: + """Update or create a download status record.""" + print('Updating Firestore Manifest.') + + status = DownloadStatus.to_dict(download_status) + doc_id = generate_md5_hash(status['location']) + + # Update document with download status + download_doc_ref = ( + self.root_document_for_store(doc_id) + ) + + result: WriteResult = download_doc_ref.set(status) + + print(f'Firestore manifest updated. ' + f'update_time={result.update_time}, ' + f'filename={download_status.location}.') + + def root_document_for_store(self, store_scheme: str) -> DocumentReference: + """Get the root manifest document given the user's config and current document's storage location.""" + # TODO: Get user-defined collection for manifest. + root_collection = 'test_manifest' + return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/downloader_kubernetes/util.py b/weather_dl_v2/downloader_kubernetes/util.py new file mode 100644 index 00000000..7b8b128d --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/util.py @@ -0,0 +1,185 @@ +import datetime +import geojson +import hashlib +import itertools +import os +import socket +import subprocess +import sys +import typing as t + +import numpy as np +import pandas as pd +from apache_beam.io.gcp import gcsio +from apache_beam.utils import retry +from xarray.core.utils import ensure_us_time_resolution +from urllib.parse import urlparse +from google.api_core.exceptions import BadRequest + + +LATITUDE_RANGE = (-90, 90) +LONGITUDE_RANGE = (-180, 180) +GLOBAL_COVERAGE_AREA = [90, -180, -90, 180] + + +def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: + if isinstance(exception, socket.timeout): + return True + if isinstance(exception, TimeoutError): + return True + # To handle the concurrency issue in BigQuery. + if isinstance(exception, BadRequest): + return True + return retry.retry_if_valid_input_but_server_error_and_timeout_filter(exception) + + +class _FakeClock: + def sleep(self, value): + pass + + +def retry_with_exponential_backoff(fun): + """A retry decorator that doesn't apply during test time.""" + clock = retry.Clock() + + # Use a fake clock only during test time... + if 'unittest' in sys.modules.keys(): + clock = _FakeClock() + + return retry.with_exponential_backoff( + retry_filter=_retry_if_valid_input_but_server_or_socket_error_and_timeout_filter, + clock=clock, + )(fun) + + +# TODO(#245): Group with common utilities (duplicated) +def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]: + """Yield evenly-sized chunks from an iterable.""" + input_ = iter(iterable) + try: + while True: + it = itertools.islice(input_, n) + # peek to check if 'it' has next item. + first = next(it) + yield itertools.chain([first], it) + except StopIteration: + pass + + +# TODO(#245): Group with common utilities (duplicated) +def copy(src: str, dst: str) -> None: + """Copy data via `gsutil cp`.""" + try: + subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) + except subprocess.CalledProcessError as e: + print(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + raise + + +# TODO(#245): Group with common utilities (duplicated) +def to_json_serializable_type(value: t.Any) -> t.Any: + """Returns the value with a type serializable to JSON""" + # Note: The order of processing is significant. + print('Serializing to JSON') + + if pd.isna(value) or value is None: + return None + elif np.issubdtype(type(value), np.floating): + return float(value) + elif type(value) == np.ndarray: + # Will return a scaler if array is of size 1, else will return a list. + return value.tolist() + elif type(value) == datetime.datetime or type(value) == str or type(value) == np.datetime64: + # Assume strings are ISO format timestamps... + try: + value = datetime.datetime.fromisoformat(value) + except ValueError: + # ... if they are not, assume serialization is already correct. + return value + except TypeError: + # ... maybe value is a numpy datetime ... + try: + value = ensure_us_time_resolution(value).astype(datetime.datetime) + except AttributeError: + # ... value is a datetime object, continue. + pass + + # We use a string timestamp representation. + if value.tzname(): + return value.isoformat() + + # We assume here that naive timestamps are in UTC timezone. + return value.replace(tzinfo=datetime.timezone.utc).isoformat() + elif type(value) == np.timedelta64: + # Return time delta in seconds. + return float(value / np.timedelta64(1, 's')) + # This check must happen after processing np.timedelta64 and np.datetime64. + elif np.issubdtype(type(value), np.integer): + return int(value) + + return value + + +def fetch_geo_polygon(area: t.Union[list, str]) -> str: + """Calculates a geography polygon from an input area.""" + # Ref: https://confluence.ecmwf.int/pages/viewpage.action?pageId=151520973 + if isinstance(area, str): + # European area + if area == 'E': + area = [73.5, -27, 33, 45] + # Global area + elif area == 'G': + area = GLOBAL_COVERAGE_AREA + else: + raise RuntimeError(f'Not a valid value for area in config: {area}.') + + n, w, s, e = [float(x) for x in area] + if s < LATITUDE_RANGE[0]: + raise ValueError(f"Invalid latitude value for south: '{s}'") + if n > LATITUDE_RANGE[1]: + raise ValueError(f"Invalid latitude value for north: '{n}'") + if w < LONGITUDE_RANGE[0]: + raise ValueError(f"Invalid longitude value for west: '{w}'") + if e > LONGITUDE_RANGE[1]: + raise ValueError(f"Invalid longitude value for east: '{e}'") + + # Define the coordinates of the bounding box. + coords = [[w, n], [w, s], [e, s], [e, n], [w, n]] + + # Create the GeoJSON polygon object. + polygon = geojson.dumps(geojson.Polygon([coords])) + return polygon + + +def get_file_size(path: str) -> float: + parsed_gcs_path = urlparse(path) + if parsed_gcs_path.scheme != 'gs' or parsed_gcs_path.netloc == '': + return os.stat(path).st_size / (1024 ** 3) if os.path.exists(path) else 0 + else: + return gcsio.GcsIO().size(path) / (1024 ** 3) if gcsio.GcsIO().exists(path) else 0 + + +def get_wait_interval(num_retries: int = 0) -> float: + """Returns next wait interval in seconds, using an exponential backoff algorithm.""" + if 0 == num_retries: + return 0 + return 2 ** num_retries + + +def generate_md5_hash(input: str) -> str: + """Generates md5 hash for the input string.""" + return hashlib.md5(input.encode('utf-8')).hexdigest() + + +def download_with_aria2(url: str, path: str) -> None: + """Downloads a file from the given URL using the `aria2c` command-line utility, + with options set to improve download speed and reliability.""" + dir_path, file_name = os.path.split(path) + try: + subprocess.run( + ['aria2c', '-x', '16', '-s', '16', url, '-d', dir_path, '-o', file_name, '--allow-overwrite'], + check=True, + capture_output=True) + except subprocess.CalledProcessError as e: + print(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + raise diff --git a/weather_dl_v2/fastapi-server/Dockerfile b/weather_dl_v2/fastapi-server/Dockerfile index 08e0c60c..e39e47a8 100644 --- a/weather_dl_v2/fastapi-server/Dockerfile +++ b/weather_dl_v2/fastapi-server/Dockerfile @@ -10,6 +10,7 @@ RUN conda install -n base conda-libmamba-solver RUN conda config --set solver libmamba COPY . . +COPY ../license_deployment/license_deployment.yaml . # Create conda env using environment.yml RUN conda env create -f environment.yml --debug diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 7575ec9e..fbbe923a 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -12,6 +12,7 @@ class License(BaseModel): number_of_requests: int api_key: str api_url: str + api_email: str class LicenseInternal(License): diff --git a/weather_dl_v2/license_deployment/Dockerfile b/weather_dl_v2/license_deployment/Dockerfile new file mode 100644 index 00000000..579237dc --- /dev/null +++ b/weather_dl_v2/license_deployment/Dockerfile @@ -0,0 +1,20 @@ +FROM continuumio/miniconda3:latest + +# Update miniconda +RUN conda update conda -y + +# Add the mamba solver for faster builds +RUN conda install -n base conda-libmamba-solver +RUN conda config --set solver libmamba + +COPY . . +COPY ../downloader/downloader.yaml . +# Create conda env using environment.yml +RUN conda env create -f environment.yml --debug + +# Activate the conda env and update the PATH +ARG CONDA_ENV_NAME=weather-dl-v2-license-dep +RUN echo "source activate ${CONDA_ENV_NAME}" >> ~/.bashrc +ENV PATH /opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH + +ENTRYPOINT ["python", "-u", "fetch.py"] diff --git a/weather_dl_v2/license_deployment/README.md b/weather_dl_v2/license_deployment/README.md new file mode 100644 index 00000000..97f32647 --- /dev/null +++ b/weather_dl_v2/license_deployment/README.md @@ -0,0 +1,37 @@ +# Deployment Instructions & General Notes + +* **How to create environment:** +``` +conda env create --name weather-dl-v2-license-dep --file=environment.yml + +conda activate weather-dl-v2-license-dep +``` + +* **Create docker image for license deployment**: +``` +export PROJECT_ID= +export REPO= eg:weather-tools + +gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-license-dep" --timeout=79200 --machine-type=e2-highcpu-32 +``` + +* **Add path of created license deployment image in license_deployment.yaml**: +``` +Please write down the license deployment's docker image path at Line 22 of license_deployment.yaml. +``` + +* **Deploy license deployment's on kubernetes:** +We are not required to this step manually, this will be done by fastapi-server. +``` +kubectl apply -f license_deployment.yaml -- --license +``` + +## General Commands +* **For viewing the current pods**: +``` +kubectl get pods +``` + +* **For deleting existing deployment**: +``` +kubectl delete -f ./license_deployment.yaml --force \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/__init__.py b/weather_dl_v2/license_deployment/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/license_deployment/clients.py b/weather_dl_v2/license_deployment/clients.py new file mode 100644 index 00000000..46dcb49f --- /dev/null +++ b/weather_dl_v2/license_deployment/clients.py @@ -0,0 +1,396 @@ +"""ECMWF Downloader Clients.""" + +import abc +import collections +import contextlib +import datetime +import io +import logging +import os +import time +import typing as t +import warnings +from urllib.parse import urljoin + +from cdsapi import api as cds_api +import urllib3 +from ecmwfapi import api + +from config import optimize_selection_partition +from manifest import Manifest, Stage +from util import download_with_aria2, retry_with_exponential_backoff + +warnings.simplefilter( + "ignore", category=urllib3.connectionpool.InsecureRequestWarning) + + +class Client(abc.ABC): + """Weather data provider client interface. + + Defines methods and properties required to efficiently interact with weather + data providers. + + Attributes: + config: A config that contains pipeline parameters, such as API keys. + level: Default log level for the client. + """ + + def __init__(self, dataset: str, level: int = logging.INFO) -> None: + """Clients are initialized with the general CLI configuration.""" + self.dataset = dataset + self.logger = logging.getLogger(f'{__name__}.{type(self).__name__}') + self.logger.setLevel(level) + + @abc.abstractmethod + def retrieve(self, dataset: str, selection: t.Dict, output: str, manifest: Manifest) -> None: + """Download from data source.""" + pass + + @classmethod + @abc.abstractmethod + def num_requests_per_key(cls, dataset: str) -> int: + """Specifies the number of workers to be used per api key for the dataset.""" + pass + + @property + @abc.abstractmethod + def license_url(self): + """Specifies the License URL.""" + pass + + +class SplitCDSRequest(cds_api.Client): + """Extended CDS class that separates fetch and download stage.""" + @retry_with_exponential_backoff + def _download(self, url, path: str, size: int) -> None: + self.info("Downloading %s to %s (%s)", url, path, cds_api.bytes_to_string(size)) + start = time.time() + + download_with_aria2(url, path) + + elapsed = time.time() - start + if elapsed: + self.info("Download rate %s/s", cds_api.bytes_to_string(size / elapsed)) + + def fetch(self, request: t.Dict, dataset: str) -> t.Dict: + result = self.retrieve(dataset, request) + return {'href': result.location, 'size': result.content_length} + + def download(self, result: cds_api.Result, target: t.Optional[str] = None) -> None: + if target: + if os.path.exists(target): + # Empty the target file, if it already exists, otherwise the + # transfer below might be fooled into thinking we're resuming + # an interrupted download. + open(target, "w").close() + + self._download(result["href"], target, result["size"]) + + +class CdsClient(Client): + """A client to access weather data from the Cloud Data Store (CDS). + + Datasets on CDS can be found at: + https://cds.climate.copernicus.eu/cdsapp#!/search?type=dataset + + The parameters section of the input `config` requires two values: `api_url` and + `api_key`. Or, these values can be set as the environment variables: `CDSAPI_URL` + and `CDSAPI_KEY`. These can be acquired from the following URL, which requires + creating a free account: https://cds.climate.copernicus.eu/api-how-to + + The CDS global queues for data access has dynamic rate limits. These can be viewed + live here: https://cds.climate.copernicus.eu/live/limits. + + Attributes: + config: A config that contains pipeline parameters, such as API keys. + level: Default log level for the client. + """ + + """Name patterns of datasets that are hosted internally on CDS servers.""" + cds_hosted_datasets = {'reanalysis-era'} + + def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: + c = CDSClientExtended( + url=os.environ.get('CLIENT_URL'), + key=os.environ.get('CLIENT_KEY'), + debug_callback=self.logger.debug, + info_callback=self.logger.info, + warning_callback=self.logger.warning, + error_callback=self.logger.error, + ) + selection_ = optimize_selection_partition(selection) + with StdoutLogger(self.logger, level=logging.DEBUG): + manifest.set_stage(Stage.FETCH) + precise_fetch_start_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + manifest.prev_stage_precise_start_time = precise_fetch_start_time + result = c.fetch(selection_, dataset) + return result + + @property + def license_url(self): + return 'https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf' + + @classmethod + def num_requests_per_key(cls, dataset: str) -> int: + """Number of requests per key from the CDS API. + + CDS has dynamic, data-specific limits, defined here: + https://cds.climate.copernicus.eu/live/limits + + Typically, the reanalysis dataset allows for 3-5 simultaneous requets. + For all standard CDS data (backed on disk drives), it's common that 2 + requests are allowed, though this is dynamically set, too. + + If the Beam pipeline encounters a user request limit error, please cancel + all outstanding requests (per each user account) at the following link: + https://cds.climate.copernicus.eu/cdsapp#!/yourrequests + """ + # TODO(#15): Parse live CDS limits API to set data-specific limits. + for internal_set in cls.cds_hosted_datasets: + if dataset.startswith(internal_set): + return 5 + return 2 + + +class StdoutLogger(io.StringIO): + """Special logger to redirect stdout to logs.""" + + def __init__(self, logger_: logging.Logger, level: int = logging.INFO): + super().__init__() + self.logger = logger_ + self.level = level + self._redirector = contextlib.redirect_stdout(self) + + def log(self, msg) -> None: + self.logger.log(self.level, msg) + + def write(self, msg): + if msg and not msg.isspace(): + self.logger.log(self.level, msg) + + def __enter__(self): + self._redirector.__enter__() + return self + + def __exit__(self, exc_type, exc_value, traceback): + # let contextlib do any exception handling here + self._redirector.__exit__(exc_type, exc_value, traceback) + + +class SplitMARSRequest(api.APIRequest): + """Extended MARS APIRequest class that separates fetch and download stage.""" + @retry_with_exponential_backoff + def _download(self, url, path: str, size: int) -> None: + self.log( + "Transferring %s into %s" % (self._bytename(size), path) + ) + self.log("From %s" % (url,)) + + download_with_aria2(url, path) + + def fetch(self, request: t.Dict, dataset: str) -> t.Dict: + status = None + + self.connection.submit("%s/%s/requests" % (self.url, self.service), request) + self.log("Request submitted") + self.log("Request id: " + self.connection.last.get("name")) + if self.connection.status != status: + status = self.connection.status + self.log("Request is %s" % (status,)) + + while not self.connection.ready(): + if self.connection.status != status: + status = self.connection.status + self.log("Request is %s" % (status,)) + self.connection.wait() + + if self.connection.status != status: + status = self.connection.status + self.log("Request is %s" % (status,)) + + result = self.connection.result() + return result + + def download(self, result: t.Dict, target: t.Optional[str] = None) -> None: + if target: + if os.path.exists(target): + # Empty the target file, if it already exists, otherwise the + # transfer below might be fooled into thinking we're resuming + # an interrupted download. + open(target, "w").close() + + self._download(urljoin(self.url, result["href"]), target, result["size"]) + self.connection.cleanup() + + +class SplitRequestMixin: + c = None + + def fetch(self, req: t.Dict, dataset: t.Optional[str] = None) -> t.Dict: + return self.c.fetch(req, dataset) + + def download(self, res: t.Dict, target: str) -> None: + self.c.download(res, target) + + +class CDSClientExtended(SplitRequestMixin): + """Extended CDS Client class that separates fetch and download stage.""" + def __init__(self, *args, **kwargs): + self.c = SplitCDSRequest(*args, **kwargs) + + +class MARSECMWFServiceExtended(api.ECMWFService, SplitRequestMixin): + """Extended MARS ECMFService class that separates fetch and download stage.""" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.c = SplitMARSRequest( + self.url, + "services/%s" % (self.service,), + email=self.email, + key=self.key, + log=self.log, + verbose=self.verbose, + quiet=self.quiet, + ) + + +class PublicECMWFServerExtended(api.ECMWFDataServer, SplitRequestMixin): + def __init__(self, *args, dataset='', **kwargs): + super().__init__(*args, **kwargs) + self.c = SplitMARSRequest( + self.url, + "datasets/%s" % (dataset,), + email=self.email, + key=self.key, + log=self.log, + verbose=self.verbose, + ) + + +class MarsClient(Client): + """A client to access data from the Meteorological Archival and Retrieval System (MARS). + + See https://www.ecmwf.int/en/forecasts/datasets for a summary of datasets available + on MARS. Most notable, MARS provides access to ECMWF's Operational Archive + https://www.ecmwf.int/en/forecasts/dataset/operational-archive. + + The client config must contain three parameters to autheticate access to the MARS archive: + `api_key`, `api_url`, and `api_email`. These can also be configued by setting the + commensurate environment variables: `MARSAPI_KEY`, `MARSAPI_URL`, and `MARSAPI_EMAIL`. + These credentials can be looked up by after registering for an ECMWF account + (https://apps.ecmwf.int/registration/) and visitng: https://api.ecmwf.int/v1/key/. + + MARS server activity can be observed at https://apps.ecmwf.int/mars-activity/. + + Attributes: + config: A config that contains pipeline parameters, such as API keys. + level: Default log level for the client. + """ + def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: + c = MARSECMWFServiceExtended( + "mars", + key=os.environ.get("CLIENT_KEY"), + url=os.environ.get("CLIENT_URL"), + email=os.environ.get("CLIENT_EMAIL"), + log=self.logger.debug, + verbose=True, + ) + selection_ = optimize_selection_partition(selection) + with StdoutLogger(self.logger, level=logging.DEBUG): + manifest.set_stage(Stage.FETCH) + precise_fetch_start_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + manifest.prev_stage_precise_start_time = precise_fetch_start_time + result = c.fetch(req=selection_) + return result + + @property + def license_url(self): + return 'https://apps.ecmwf.int/datasets/licences/general/' + + @classmethod + def num_requests_per_key(cls, dataset: str) -> int: + """Number of requests per key (or user) for the Mars API. + + Mars allows 2 active requests per user and 20 queued requests per user, as of Sept 27, 2021. + To ensure we never hit a rate limit error during download, we only make use of the active + requests. + See: https://confluence.ecmwf.int/display/UDOC/Total+number+of+requests+a+user+can+submit+-+Web+API+FAQ + + Queued requests can _only_ be canceled manually from a web dashboard. If the + `ERROR 101 (USER_QUEUED_LIMIT_EXCEEDED)` error occurs in the Beam pipeline, then go to + http://apps.ecmwf.int/webmars/joblist/ and cancel queued jobs. + """ + return 2 + + +class ECMWFPublicClient(Client): + """A client for ECMWF's public datasets, like TIGGE.""" + def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: + c = PublicECMWFServerExtended( + url=os.environ.get("CLIENT_URL"), + key=os.environ.get("CLIENT_KEY"), + email=os.environ.get("CLIENT_EMAIL"), + log=self.logger.debug, + verbose=True, + dataset=dataset, + ) + selection_ = optimize_selection_partition(selection) + with StdoutLogger(self.logger, level=logging.DEBUG): + manifest.set_stage(Stage.FETCH) + precise_fetch_start_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + manifest.prev_stage_precise_start_time = precise_fetch_start_time + result = c.fetch(req=selection_) + return result + + @classmethod + def num_requests_per_key(cls, dataset: str) -> int: + # Experimentally validated request limit. + return 5 + + @property + def license_url(self): + if not self.dataset: + raise ValueError('must specify a dataset for this client!') + return f'https://apps.ecmwf.int/datasets/data/{self.dataset.lower()}/licence/' + + +class FakeClient(Client): + """A client that writes the selection arguments to the output file.""" + + def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: + manifest.set_stage(Stage.RETRIEVE) + precise_retrieve_start_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + manifest.prev_stage_precise_start_time = precise_retrieve_start_time + self.logger.debug(f'Downloading {dataset}.') + + @property + def license_url(self): + return 'lorem ipsum' + + @classmethod + def num_requests_per_key(cls, dataset: str) -> int: + return 1 + + +CLIENTS = collections.OrderedDict( + cds=CdsClient, + mars=MarsClient, + ecpublic=ECMWFPublicClient, + fake=FakeClient, +) diff --git a/weather_dl_v2/license_deployment/config.py b/weather_dl_v2/license_deployment/config.py new file mode 100644 index 00000000..1143ce61 --- /dev/null +++ b/weather_dl_v2/license_deployment/config.py @@ -0,0 +1,101 @@ +import calendar +import copy +import dataclasses +import typing as t + +Values = t.Union[t.List['Values'], t.Dict[str, 'Values'], bool, int, float, str] # pytype: disable=not-supported-yet + + +@dataclasses.dataclass +class Config: + """Contains pipeline parameters. + + Attributes: + config_name: + Name of the config file. + client: + Name of the Weather-API-client. Supported clients are mentioned in the 'CLIENTS' variable. + dataset (optional): + Name of the target dataset. Allowed options are dictated by the client. + partition_keys (optional): + Choose the keys from the selection section to partition the data request. + This will compute a cartesian cross product of the selected keys + and assign each as their own download. + target_path: + Download artifact filename template. Can make use of Python's standard string formatting. + It can contain format symbols to be replaced by partition keys; + if this is used, the total number of format symbols must match the number of partition keys. + subsection_name: + Name of the particular subsection. 'default' if there is no subsection. + force_download: + Force redownload of partitions that were previously downloaded. + user_id: + Username from the environment variables. + kwargs (optional): + For representing subsections or any other parameters. + selection: + Contains parameters used to select desired data. + """ + + config_name: str = "" + client: str = "" + dataset: t.Optional[str] = "" + target_path: str = "" + partition_keys: t.Optional[t.List[str]] = dataclasses.field(default_factory=list) + subsection_name: str = "default" + force_download: bool = False + user_id: str = "unknown" + kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) + selection: t.Dict[str, Values] = dataclasses.field(default_factory=dict) + + @classmethod + def from_dict(cls, config: t.Dict) -> 'Config': + config_instance = cls() + for section_key, section_value in config.items(): + if section_key == "parameters": + for key, value in section_value.items(): + if hasattr(config_instance, key): + setattr(config_instance, key, value) + else: + config_instance.kwargs[key] = value + if section_key == "selection": + config_instance.selection = section_value + return config_instance + + +def optimize_selection_partition(selection: t.Dict) -> t.Dict: + """Compute right-hand-side values for the selection section of a single partition. + + Used to support custom syntax and optimizations, such as 'all'. + """ + selection_ = copy.deepcopy(selection) + + if 'day' in selection_.keys() and selection_['day'] == 'all': + year, month = selection_['year'], selection_['month'] + + multiples_error = "Cannot use keyword 'all' on selections with multiple '{type}'s." + + if isinstance(year, list): + assert len(year) == 1, multiples_error.format(type='year') + year = year[0] + + if isinstance(month, list): + assert len(month) == 1, multiples_error.format(type='month') + month = month[0] + + if isinstance(year, str): + assert '/' not in year, multiples_error.format(type='year') + + if isinstance(month, str): + assert '/' not in month, multiples_error.format(type='month') + + year, month = int(year), int(month) + + _, n_days_in_month = calendar.monthrange(year, month) + + selection_['date'] = f'{year:04d}-{month:02d}-01/to/{year:04d}-{month:02d}-{n_days_in_month:02d}' + del selection_['day'] + del selection_['month'] + del selection_['year'] + + return selection_ diff --git a/weather_dl_v2/license_deployment/database.py b/weather_dl_v2/license_deployment/database.py new file mode 100644 index 00000000..a165199d --- /dev/null +++ b/weather_dl_v2/license_deployment/database.py @@ -0,0 +1,106 @@ +import abc +import time +import firebase_admin +from firebase_admin import firestore +from firebase_admin import credentials +from google.cloud.firestore_v1 import DocumentSnapshot, DocumentReference +from google.cloud.firestore_v1.types import WriteResult +from google.cloud.firestore_v1.base_query import FieldFilter, And +from util import get_wait_interval + + +class Database(abc.ABC): + @abc.abstractmethod + def _get_db(self): + pass + + +class CRUDOperations(abc.ABC): + @abc.abstractmethod + def _initialize_license_deployment(self, license_id: str) -> dict: + pass + + @abc.abstractmethod + def _get_config_from_queue_by_license_id(self, license_id: str) -> dict: + pass + + @abc.abstractmethod + def _remove_config_from_license_queue(self, license_id: str, config_name: str) -> None: + pass + + @abc.abstractmethod + def _get_partition_from_manifest(self, config_name: str) -> str: + pass + +class FirestoreClient(Database, CRUDOperations): + def _get_db(self) -> firestore.firestore.Client: + """Acquire a firestore client, initializing the firebase app if necessary. + Will attempt to get the db client five times. If it's still unsuccessful, a + `ManifestException` will be raised. + """ + db = None + attempts = 0 + + while db is None: + try: + db = firestore.client() + except ValueError as e: + # The above call will fail with a value error when the firebase app is not initialized. + # Initialize the app here, and try again. + # Use the application default credentials. + cred = credentials.ApplicationDefault() + + firebase_admin.initialize_app(cred) + print('Initialized Firebase App.') + + if attempts > 4: + raise RuntimeError('Exceeded number of retries to get firestore client.') from e + + time.sleep(get_wait_interval(attempts)) + + attempts += 1 + + return db + + def _initialize_license_deployment(self, license_id: str) -> dict: + result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() + return result.to_dict() + + def _get_config_from_queue_by_license_id(self, license_id: str) -> str | None: + result: DocumentSnapshot = self._get_db().collection('queues').document(license_id).get(['queue']) + if result.exists: + queue = result.to_dict()['queue'] + if len(queue) > 0: + return queue[0] + return None + + def _get_partition_from_manifest(self, config_name: str) -> str | None: + transaction = self._get_db().transaction() + return get_partition_from_manifest(transaction, config_name) + + def _remove_config_from_license_queue(self, license_id: str, config_name: str) -> None: + result: WriteResult = self._get_db().collection('queues').document(license_id).update({ + 'queue': firestore.ArrayRemove([config_name])}) + print(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + + +@firestore.transactional +def get_partition_from_manifest(transaction, config_name: str) -> str | None: + db_client = FirestoreClient() + filter_1 = FieldFilter("config_name", "==", config_name) + filter_2 = FieldFilter("status", "==", 'scheduled') + and_filter = And(filters=[filter_1, filter_2]) + + snapshot = db_client._get_db().collection('test_manifest').where(filter=and_filter)\ + .limit(1).get(transaction=transaction) + if len(snapshot) > 0: + snapshot = snapshot[0] + else: + return None + + ref: DocumentReference = db_client._get_db().collection(u'test_manifest').document(snapshot.id) + transaction.update(ref, { + u'status': u'processing' + }) + + return snapshot.to_dict() diff --git a/weather_dl_v2/license_deployment/environment.yml b/weather_dl_v2/license_deployment/environment.yml new file mode 100644 index 00000000..966ec213 --- /dev/null +++ b/weather_dl_v2/license_deployment/environment.yml @@ -0,0 +1,16 @@ +name: weather-dl-v2-license-dep +channels: + - conda-forge +dependencies: + - python=3.10 + - geojson + - cdsapi=0.5.1 + - ecmwf-api-client=1.6.3 + - pip=22.3 + - pip: + - kubernetes + - aiohttp + - numpy + - xarray + - apache-beam[gcp] + - firebase-admin diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py new file mode 100644 index 00000000..393a027b --- /dev/null +++ b/weather_dl_v2/license_deployment/fetch.py @@ -0,0 +1,92 @@ +import json +import threading +import time +import sys +import os + +from database import FirestoreClient +from job_creator import create_download_job +from clients import CLIENTS +from manifest import FirestoreManifest + +db_client = FirestoreClient() + +def create_job(request, result): + res = { + 'config_name': request['config_name'], + 'dataset': request['dataset'], + 'selection': json.loads(request['selection']), + 'user_id': request['username'], + 'url': result['href'], + 'target_path': request['location'] + } + + data_str = json.dumps(res) + create_download_job(data_str) + + +def make_fetch_request(request): + with semaphore: + client = CLIENTS[client_name](request['dataset']) + manifest = FirestoreManifest() + print(f'By using {client_name} datasets, ' + f'users agree to the terms and conditions specified in {client.license_url!r}') + + target = request['location'] + selection = json.loads(request['selection']) + + print(f'Fetching data for {target!r}.') + with manifest.transact(request['config_name'], request['dataset'], selection, target, request['username']): + result = client.retrieve(request['dataset'], selection, manifest) + print(f"Result fetched {result} for request {request}.") + create_job(request, result) + + +def fetch_request_from_db(): + request = None + config_name = db_client._get_config_from_queue_by_license_id(license_id) + if config_name: + request = db_client._get_partition_from_manifest(config_name) + if not request: + db_client._remove_config_from_license_queue(license_id, config_name) + return request + + +def main(): + print("Started looking at the request.") + while True: + # Fetch a request from the database + request = fetch_request_from_db() + + if request is not None: + # Create a thread to process the request + thread = threading.Thread(target=make_fetch_request, args=(request,)) + thread.start() + else: + print("No request available. Waiting...") + time.sleep(5) + + # Check if the maximum concurrency level has been reached + # If so, wait for a slot to become available + with semaphore: + pass + + +def boot_up(license: str) -> None: + global license_id, client_name, concurrency_limit, semaphore + + result = db_client._initialize_license_deployment(license) + license_id = license + client_name = result['client_name'] + concurrency_limit = result['number_of_requests'] + os.environ.setdefault('CLIENT_URL', result['api_url']) + os.environ.setdefault('CLIENT_KEY', result['api_key']) + os.environ.setdefault('CLIENT_EMAIL', result['api_email']) + semaphore = threading.Semaphore(concurrency_limit) + + +if __name__ == "__main__": + license = sys.argv[2] + print(f"Deployment for license: {license}.") + boot_up(license) + main() diff --git a/weather_dl_v2/license_deployment/job_creator.py b/weather_dl_v2/license_deployment/job_creator.py new file mode 100644 index 00000000..b5f936ca --- /dev/null +++ b/weather_dl_v2/license_deployment/job_creator.py @@ -0,0 +1,23 @@ +from os import path +import yaml +import json +import uuid +from kubernetes import client, config + + +def create_download_job(message): + """Creates a kubernetes workflow of type Job for downloading the data.""" + parsed_message = json.loads(message) + config_name, dataset, selection, user_id, url, target_path = parsed_message.values() + selection = str(selection).replace(" ", "") + config.load_config() + + with open(path.join(path.dirname(__file__), "downloader.yaml")) as f: + dep = yaml.safe_load(f) + uid = uuid.uuid4() + dep['metadata']['name'] = f'downloader-job-id-{uid}' + # d = target_path.rsplit('/')[-1] + # dep['metadata']['name'] = f'a{d}a' + dep['spec']['template']['spec']['containers'][0]['command'] = ['python', 'downloader.py', config_name, dataset, selection, user_id, url, target_path] + batch_api = client.BatchV1Api() + batch_api.create_namespaced_job(body=dep, namespace='default') \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/license_deployment.yaml b/weather_dl_v2/license_deployment/license_deployment.yaml new file mode 100644 index 00000000..b5d24207 --- /dev/null +++ b/weather_dl_v2/license_deployment/license_deployment.yaml @@ -0,0 +1,64 @@ +--- +# weather-dl-v2-license-dep Deployment +# Defines the deployment of the app running in a pod on any worker node +apiVersion: apps/v1 +kind: Deployment +metadata: + name: weather-dl-v2-license-dep + labels: + app: weather-dl-v2-license-dep +spec: + replicas: 1 + selector: + matchLabels: + app: weather-dl-v2-license-dep + template: + metadata: + labels: + app: weather-dl-v2-license-dep + spec: + containers: + - name: weather-dl-v2-license-dep + image: XXXXXXX + imagePullPolicy: Always + # resources: + # # You must specify requests for CPU to autoscale + # # based on CPU utilization + # requests: + # cpu: "250m" +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: weather-dl-v2-license-dep +rules: + - apiGroups: + - "" + - "apps" + - "batch" + resources: + - endpoints + - deployments + - pods + - jobs + verbs: + - get + - list + - watch + - create + - delete +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: weather-dl-v2-license-dep + namespace: default +subjects: + - kind: ServiceAccount + name: default + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: weather-dl-v2-license-dep +--- \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/manifest.py b/weather_dl_v2/license_deployment/manifest.py new file mode 100644 index 00000000..8d2e2c2b --- /dev/null +++ b/weather_dl_v2/license_deployment/manifest.py @@ -0,0 +1,457 @@ +"""Client interface for connecting to a manifest.""" + +import abc +import dataclasses +import datetime +import enum +import json +import pandas as pd +import time +import traceback +import typing as t + +from util import ( + to_json_serializable_type, + fetch_geo_polygon, + get_file_size, + get_wait_interval, + generate_md5_hash, + GLOBAL_COVERAGE_AREA +) + +import firebase_admin +from firebase_admin import credentials +from firebase_admin import firestore +from google.cloud.firestore_v1 import DocumentReference +from google.cloud.firestore_v1.types import WriteResult + +from database import Database + +"""An implementation-dependent Manifest URI.""" +Location = t.NewType('Location', str) + + +class ManifestException(Exception): + """Errors that occur in Manifest Clients.""" + pass + + +class Stage(enum.Enum): + """A request can be either in one of the following stages at a time: + + fetch : This represents request is currently in fetch stage i.e. request placed on the client's server + & waiting for some result before starting download (eg. MARS client). + download : This represents request is currently in download stage i.e. data is being downloading from client's + server to the worker's local file system. + upload : This represents request is currently in upload stage i.e. data is getting uploaded from worker's local + file system to target location (GCS path). + retrieve : In case of clients where there is no proper separation of fetch & download stages (eg. CDS client), + request will be in the retrieve stage i.e. fetch + download. + """ + RETRIEVE = 'retrieve' + FETCH = 'fetch' + DOWNLOAD = 'download' + UPLOAD = 'upload' + + +class Status(enum.Enum): + """Depicts the request's state status: + + scheduled : A request partition is created & scheduled for processing. + Note: Its corresponding state can be None only. + processing: This represents that the request picked by license deployment. + in-progress : This represents the request state is currently in-progress (i.e. running). + The next status would be "success" or "failure". + success : This represents the request state execution completed successfully without any error. + failure : This represents the request state execution failed. + """ + PROCESSING = 'processing' + SCHEDULED = 'scheduled' + IN_PROGRESS = 'in-progress' + SUCCESS = 'success' + FAILURE = 'failure' + + +@dataclasses.dataclass +class DownloadStatus(): + """Data recorded in `Manifest`s reflecting the status of a download.""" + + """The name of the config file associated with the request.""" + config_name: str = "" + + """Represents the dataset field of the configuration.""" + dataset: t.Optional[str] = "" + + """Copy of selection section of the configuration.""" + selection: t.Dict = dataclasses.field(default_factory=dict) + + """Location of the downloaded data.""" + location: str = "" + + """Represents area covered by the shard.""" + area: str = "" + + """Current stage of request : 'fetch', 'download', 'retrieve', 'upload' or None.""" + stage: t.Optional[Stage] = None + + """Download status: 'scheduled', 'in-progress', 'success', or 'failure'.""" + status: t.Optional[Status] = None + + """Cause of error, if any.""" + error: t.Optional[str] = "" + + """Identifier for the user running the download.""" + username: str = "" + + """Shard size in GB.""" + size: t.Optional[float] = 0 + + """A UTC datetime when download was scheduled.""" + scheduled_time: t.Optional[str] = "" + + """A UTC datetime when the retrieve stage starts.""" + retrieve_start_time: t.Optional[str] = "" + + """A UTC datetime when the retrieve state ends.""" + retrieve_end_time: t.Optional[str] = "" + + """A UTC datetime when the fetch state starts.""" + fetch_start_time: t.Optional[str] = "" + + """A UTC datetime when the fetch state ends.""" + fetch_end_time: t.Optional[str] = "" + + """A UTC datetime when the download state starts.""" + download_start_time: t.Optional[str] = "" + + """A UTC datetime when the download state ends.""" + download_end_time: t.Optional[str] = "" + + """A UTC datetime when the upload state starts.""" + upload_start_time: t.Optional[str] = "" + + """A UTC datetime when the upload state ends.""" + upload_end_time: t.Optional[str] = "" + + @classmethod + def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus': + """Instantiate DownloadStatus dataclass from dict.""" + download_status_instance = cls() + for key, value in download_status.items(): + if key == 'status': + setattr(download_status_instance, key, Status(value)) + elif key == 'stage' and value is not None: + setattr(download_status_instance, key, Stage(value)) + else: + setattr(download_status_instance, key, value) + return download_status_instance + + @classmethod + def to_dict(cls, instance) -> t.Dict: + """Return the fields of a dataclass instance as a manifest ingestible + dictionary mapping of field names to field values.""" + download_status_dict = {} + for field in dataclasses.fields(instance): + key = field.name + value = getattr(instance, field.name) + if isinstance(value, Status) or isinstance(value, Stage): + download_status_dict[key] = value.value + elif isinstance(value, pd.Timestamp): + download_status_dict[key] = value.isoformat() + elif key == 'selection' and value is not None: + download_status_dict[key] = json.dumps(value) + else: + download_status_dict[key] = value + return download_status_dict + + +@dataclasses.dataclass +class Manifest(abc.ABC): + """Abstract manifest of download statuses. + + Update download statuses to some storage medium. + + This class lets one indicate that a download is `scheduled` or in a transaction process. + In the event of a transaction, a download will be updated with an `in-progress`, `success` + or `failure` status (with accompanying metadata). + + Example: + ``` + my_manifest = parse_manifest_location(Location('fs://some-firestore-collection')) + + # Schedule data for download + my_manifest.schedule({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username') + + # ... + + # Initiate a transaction – it will record that the download is `in-progess` + with my_manifest.transact({'some': 'metadata'}, 'path/to/downloaded/file', 'my-username') as tx: + # download logic here + pass + + # ... + + # on error, will record the download as a `failure` before propagating the error. By default, it will + # record download as a `success`. + ``` + + Attributes: + status: The current `DownloadStatus` of the Manifest. + """ + + # To reduce the impact of _read() and _update() calls + # on the start time of the stage. + prev_stage_precise_start_time: t.Optional[str] = None + status: t.Optional[DownloadStatus] = None + + # This is overridden in subclass. + def __post_init__(self): + """Initialize the manifest.""" + pass + + def schedule(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Indicate that a job has been scheduled for download. + + 'scheduled' jobs occur before 'in-progress', 'success' or 'finished'. + """ + scheduled_time = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat(timespec='seconds') + self.status = DownloadStatus( + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), + username=user, + stage=None, + status=Status.SCHEDULED, + error=None, + size=None, + scheduled_time=scheduled_time, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=None, + upload_end_time=None, + ) + self._update(self.status) + + def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Updates the manifest to mark the shards that were skipped in the current job + as 'upload' stage and 'success' status, indicating that they have already been downloaded. + """ + old_status = self._read(location) + # The manifest needs to be updated for a skipped shard if its entry is not present, or + # if the stage is not 'upload', or if the stage is 'upload' but the status is not 'success'. + if old_status.location != location or old_status.stage != Stage.UPLOAD or old_status.status != Status.SUCCESS: + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + size = get_file_size(location) + + status = DownloadStatus( + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), + username=user, + stage=Stage.UPLOAD, + status=Status.SUCCESS, + error=None, + size=size, + scheduled_time=None, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=current_utc_time, + upload_end_time=current_utc_time, + ) + self._update(status) + print(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + + def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + """Reset Manifest state in preparation for a new transaction.""" + self.status = dataclasses.replace(self._read(location)) + self.status.config_name = config_name + self.status.dataset = dataset if dataset else None + self.status.selection = selection + self.status.location = location + self.status.username = user + + def __enter__(self) -> None: + pass + + def __exit__(self, exc_type, exc_inst, exc_tb) -> None: + """Record end status of a transaction as either 'success' or 'failure'.""" + if exc_type is None: + status = Status.SUCCESS + error = None + else: + status = Status.FAILURE + # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception + error = '\n'.join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + + new_status = dataclasses.replace(self.status) + new_status.error = error + new_status.status = status + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + # This is necessary for setting the precise start time of the previous stage + # and end time of the final stage, as well as handling the case of Status.FAILURE. + if new_status.stage == Stage.FETCH: + new_status.fetch_start_time = self.prev_stage_precise_start_time + new_status.fetch_end_time = current_utc_time + elif new_status.stage == Stage.RETRIEVE: + new_status.retrieve_start_time = self.prev_stage_precise_start_time + new_status.retrieve_end_time = current_utc_time + elif new_status.stage == Stage.DOWNLOAD: + new_status.download_start_time = self.prev_stage_precise_start_time + new_status.download_end_time = current_utc_time + else: + new_status.upload_start_time = self.prev_stage_precise_start_time + new_status.upload_end_time = current_utc_time + + new_status.size = get_file_size(new_status.location) + + self.status = new_status + + self._update(self.status) + + def transact(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> 'Manifest': + """Create a download transaction.""" + self._set_for_transaction(config_name, dataset, selection, location, user) + return self + + def set_stage(self, stage: Stage) -> None: + """Sets the current stage in manifest.""" + prev_stage = self.status.stage + new_status = dataclasses.replace(self.status) + new_status.stage = stage + new_status.status = Status.IN_PROGRESS + current_utc_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec='seconds') + ) + + if stage == Stage.FETCH: + new_status.fetch_start_time = current_utc_time + elif stage == Stage.RETRIEVE: + new_status.retrieve_start_time = current_utc_time + elif stage == Stage.DOWNLOAD: + new_status.fetch_start_time = self.prev_stage_precise_start_time + new_status.fetch_end_time = current_utc_time + new_status.download_start_time = current_utc_time + else: + if prev_stage == Stage.DOWNLOAD: + new_status.download_start_time = self.prev_stage_precise_start_time + new_status.download_end_time = current_utc_time + else: + new_status.retrieve_start_time = self.prev_stage_precise_start_time + new_status.retrieve_end_time = current_utc_time + new_status.upload_start_time = current_utc_time + + self.status = new_status + self._update(self.status) + + @abc.abstractmethod + def _read(self, location: str) -> DownloadStatus: + pass + + @abc.abstractmethod + def _update(self, download_status: DownloadStatus) -> None: + pass + + +class FirestoreManifest(Manifest, Database): + """A Firestore Manifest. + This Manifest implementation stores DownloadStatuses in a Firebase document store. + The document hierarchy for the manifest is as follows: + [manifest ] + ├── doc_id (md5 hash of the path) { 'selection': {...}, 'location': ..., 'username': ... } + └── etc... + Where `[]` indicates a collection and ` {...}` indicates a document. + """ + + def _get_db(self) -> firestore.firestore.Client: + """Acquire a firestore client, initializing the firebase app if necessary. + Will attempt to get the db client five times. If it's still unsuccessful, a + `ManifestException` will be raised. + """ + db = None + attempts = 0 + + while db is None: + try: + db = firestore.client() + except ValueError as e: + # The above call will fail with a value error when the firebase app is not initialized. + # Initialize the app here, and try again. + # Use the application default credentials. + cred = credentials.ApplicationDefault() + + firebase_admin.initialize_app(cred) + print('Initialized Firebase App.') + + if attempts > 4: + raise ManifestException('Exceeded number of retries to get firestore client.') from e + + time.sleep(get_wait_interval(attempts)) + + attempts += 1 + + return db + + def _read(self, location: str) -> DownloadStatus: + """Reads the JSON data from a manifest.""" + + doc_id = generate_md5_hash(location) + + # Update document with download status + download_doc_ref = ( + self.root_document_for_store(doc_id) + ) + + result = download_doc_ref.get() + row = {} + if result.exists: + records = result.to_dict() + row = {n: to_json_serializable_type(v) for n, v in records.items()} + return DownloadStatus.from_dict(row) + + def _update(self, download_status: DownloadStatus) -> None: + """Update or create a download status record.""" + print('Updating Firestore Manifest.') + + status = DownloadStatus.to_dict(download_status) + doc_id = generate_md5_hash(status['location']) + + # Update document with download status + download_doc_ref = ( + self.root_document_for_store(doc_id) + ) + + result: WriteResult = download_doc_ref.set(status) + + print(f'Firestore manifest updated. ' + f'update_time={result.update_time}, ' + f'filename={download_status.location}.') + + def root_document_for_store(self, store_scheme: str) -> DocumentReference: + """Get the root manifest document given the user's config and current document's storage location.""" + # TODO: Get user-defined collection for manifest. + root_collection = 'test_manifest' + return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/license_deployment/util.py b/weather_dl_v2/license_deployment/util.py new file mode 100644 index 00000000..7b8b128d --- /dev/null +++ b/weather_dl_v2/license_deployment/util.py @@ -0,0 +1,185 @@ +import datetime +import geojson +import hashlib +import itertools +import os +import socket +import subprocess +import sys +import typing as t + +import numpy as np +import pandas as pd +from apache_beam.io.gcp import gcsio +from apache_beam.utils import retry +from xarray.core.utils import ensure_us_time_resolution +from urllib.parse import urlparse +from google.api_core.exceptions import BadRequest + + +LATITUDE_RANGE = (-90, 90) +LONGITUDE_RANGE = (-180, 180) +GLOBAL_COVERAGE_AREA = [90, -180, -90, 180] + + +def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: + if isinstance(exception, socket.timeout): + return True + if isinstance(exception, TimeoutError): + return True + # To handle the concurrency issue in BigQuery. + if isinstance(exception, BadRequest): + return True + return retry.retry_if_valid_input_but_server_error_and_timeout_filter(exception) + + +class _FakeClock: + def sleep(self, value): + pass + + +def retry_with_exponential_backoff(fun): + """A retry decorator that doesn't apply during test time.""" + clock = retry.Clock() + + # Use a fake clock only during test time... + if 'unittest' in sys.modules.keys(): + clock = _FakeClock() + + return retry.with_exponential_backoff( + retry_filter=_retry_if_valid_input_but_server_or_socket_error_and_timeout_filter, + clock=clock, + )(fun) + + +# TODO(#245): Group with common utilities (duplicated) +def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]: + """Yield evenly-sized chunks from an iterable.""" + input_ = iter(iterable) + try: + while True: + it = itertools.islice(input_, n) + # peek to check if 'it' has next item. + first = next(it) + yield itertools.chain([first], it) + except StopIteration: + pass + + +# TODO(#245): Group with common utilities (duplicated) +def copy(src: str, dst: str) -> None: + """Copy data via `gsutil cp`.""" + try: + subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) + except subprocess.CalledProcessError as e: + print(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + raise + + +# TODO(#245): Group with common utilities (duplicated) +def to_json_serializable_type(value: t.Any) -> t.Any: + """Returns the value with a type serializable to JSON""" + # Note: The order of processing is significant. + print('Serializing to JSON') + + if pd.isna(value) or value is None: + return None + elif np.issubdtype(type(value), np.floating): + return float(value) + elif type(value) == np.ndarray: + # Will return a scaler if array is of size 1, else will return a list. + return value.tolist() + elif type(value) == datetime.datetime or type(value) == str or type(value) == np.datetime64: + # Assume strings are ISO format timestamps... + try: + value = datetime.datetime.fromisoformat(value) + except ValueError: + # ... if they are not, assume serialization is already correct. + return value + except TypeError: + # ... maybe value is a numpy datetime ... + try: + value = ensure_us_time_resolution(value).astype(datetime.datetime) + except AttributeError: + # ... value is a datetime object, continue. + pass + + # We use a string timestamp representation. + if value.tzname(): + return value.isoformat() + + # We assume here that naive timestamps are in UTC timezone. + return value.replace(tzinfo=datetime.timezone.utc).isoformat() + elif type(value) == np.timedelta64: + # Return time delta in seconds. + return float(value / np.timedelta64(1, 's')) + # This check must happen after processing np.timedelta64 and np.datetime64. + elif np.issubdtype(type(value), np.integer): + return int(value) + + return value + + +def fetch_geo_polygon(area: t.Union[list, str]) -> str: + """Calculates a geography polygon from an input area.""" + # Ref: https://confluence.ecmwf.int/pages/viewpage.action?pageId=151520973 + if isinstance(area, str): + # European area + if area == 'E': + area = [73.5, -27, 33, 45] + # Global area + elif area == 'G': + area = GLOBAL_COVERAGE_AREA + else: + raise RuntimeError(f'Not a valid value for area in config: {area}.') + + n, w, s, e = [float(x) for x in area] + if s < LATITUDE_RANGE[0]: + raise ValueError(f"Invalid latitude value for south: '{s}'") + if n > LATITUDE_RANGE[1]: + raise ValueError(f"Invalid latitude value for north: '{n}'") + if w < LONGITUDE_RANGE[0]: + raise ValueError(f"Invalid longitude value for west: '{w}'") + if e > LONGITUDE_RANGE[1]: + raise ValueError(f"Invalid longitude value for east: '{e}'") + + # Define the coordinates of the bounding box. + coords = [[w, n], [w, s], [e, s], [e, n], [w, n]] + + # Create the GeoJSON polygon object. + polygon = geojson.dumps(geojson.Polygon([coords])) + return polygon + + +def get_file_size(path: str) -> float: + parsed_gcs_path = urlparse(path) + if parsed_gcs_path.scheme != 'gs' or parsed_gcs_path.netloc == '': + return os.stat(path).st_size / (1024 ** 3) if os.path.exists(path) else 0 + else: + return gcsio.GcsIO().size(path) / (1024 ** 3) if gcsio.GcsIO().exists(path) else 0 + + +def get_wait_interval(num_retries: int = 0) -> float: + """Returns next wait interval in seconds, using an exponential backoff algorithm.""" + if 0 == num_retries: + return 0 + return 2 ** num_retries + + +def generate_md5_hash(input: str) -> str: + """Generates md5 hash for the input string.""" + return hashlib.md5(input.encode('utf-8')).hexdigest() + + +def download_with_aria2(url: str, path: str) -> None: + """Downloads a file from the given URL using the `aria2c` command-line utility, + with options set to improve download speed and reliability.""" + dir_path, file_name = os.path.split(path) + try: + subprocess.run( + ['aria2c', '-x', '16', '-s', '16', url, '-d', dir_path, '-o', file_name, '--allow-overwrite'], + check=True, + capture_output=True) + except subprocess.CalledProcessError as e: + print(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + raise From 091d18b234c8c8a54e295b0532f1f1be9d3ede74 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Fri, 9 Jun 2023 09:55:44 +0000 Subject: [PATCH 09/51] Fix license deployment --- weather_dl_v2/license_deployment/fetch.py | 63 ++++++++++++----------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index 393a027b..5fec64b2 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -1,5 +1,6 @@ +from concurrent.futures import ThreadPoolExecutor import json -import threading +import logging import time import sys import os @@ -11,6 +12,8 @@ db_client = FirestoreClient() +logger = logging.getLogger(__name__) + def create_job(request, result): res = { 'config_name': request['config_name'], @@ -22,24 +25,24 @@ def create_job(request, result): } data_str = json.dumps(res) + logger.info(f"Creating download job for res: {data_str}") create_download_job(data_str) def make_fetch_request(request): - with semaphore: - client = CLIENTS[client_name](request['dataset']) - manifest = FirestoreManifest() - print(f'By using {client_name} datasets, ' - f'users agree to the terms and conditions specified in {client.license_url!r}') + client = CLIENTS[client_name](request['dataset']) + manifest = FirestoreManifest() + logger.info(f'By using {client_name} datasets, ' + f'users agree to the terms and conditions specified in {client.license_url!r}') + + target = request['location'] + selection = json.loads(request['selection']) - target = request['location'] - selection = json.loads(request['selection']) + logger.info(f'Fetching data for {target!r}.') + with manifest.transact(request['config_name'], request['dataset'], selection, target, request['username']): + result = client.retrieve(request['dataset'], selection, manifest) - print(f'Fetching data for {target!r}.') - with manifest.transact(request['config_name'], request['dataset'], selection, target, request['username']): - result = client.retrieve(request['dataset'], selection, manifest) - print(f"Result fetched {result} for request {request}.") - create_job(request, result) + create_job(request, result) def fetch_request_from_db(): @@ -53,27 +56,26 @@ def fetch_request_from_db(): def main(): - print("Started looking at the request.") - while True: - # Fetch a request from the database - request = fetch_request_from_db() + logger.info("Started looking at the request.") + with ThreadPoolExecutor(concurrency_limit) as executor: + while True: + # Fetch a request from the database + request = fetch_request_from_db() - if request is not None: - # Create a thread to process the request - thread = threading.Thread(target=make_fetch_request, args=(request,)) - thread.start() - else: - print("No request available. Waiting...") - time.sleep(5) + if request is not None: + executor.submit(make_fetch_request, request) + else: + logger.info("No request available. Waiting...") + time.sleep(5) - # Check if the maximum concurrency level has been reached - # If so, wait for a slot to become available - with semaphore: - pass + # Check if the maximum concurrency level has been reached + # If so, wait for a slot to become available + while executor._work_queue.qsize()>=concurrency_limit: + time.sleep(1) def boot_up(license: str) -> None: - global license_id, client_name, concurrency_limit, semaphore + global license_id, client_name, concurrency_limit result = db_client._initialize_license_deployment(license) license_id = license @@ -82,11 +84,10 @@ def boot_up(license: str) -> None: os.environ.setdefault('CLIENT_URL', result['api_url']) os.environ.setdefault('CLIENT_KEY', result['api_key']) os.environ.setdefault('CLIENT_EMAIL', result['api_email']) - semaphore = threading.Semaphore(concurrency_limit) if __name__ == "__main__": license = sys.argv[2] - print(f"Deployment for license: {license}.") + logger.info(f"Deployment for license: {license}.") boot_up(license) main() From ad2fa24f4f3314e2f26637c92522a9125eb63fa2 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Fri, 9 Jun 2023 13:01:15 +0000 Subject: [PATCH 10/51] Made changes to deploy license from fastapi server. --- weather_dl_v2/fastapi-server/environment.yml | 1 + .../license_dep/deployment_creator.py | 41 +++++++++++++++++++ .../license_dep}/license_deployment.yaml | 1 + .../fastapi-server/routers/license.py | 26 +++++++----- 4 files changed, 58 insertions(+), 11 deletions(-) create mode 100644 weather_dl_v2/fastapi-server/license_dep/deployment_creator.py rename weather_dl_v2/{license_deployment => fastapi-server/license_dep}/license_deployment.yaml (98%) diff --git a/weather_dl_v2/fastapi-server/environment.yml b/weather_dl_v2/fastapi-server/environment.yml index 44e9fbe1..3b18f525 100644 --- a/weather_dl_v2/fastapi-server/environment.yml +++ b/weather_dl_v2/fastapi-server/environment.yml @@ -7,6 +7,7 @@ dependencies: - geojson - pip=22.3 - pip: + - kubernetes - fastapi[all] - python-multipart - numpy diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py new file mode 100644 index 00000000..61f3e6d5 --- /dev/null +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -0,0 +1,41 @@ +from os import path +import yaml +import uuid +from kubernetes import client, config + + +def create_license_deployment(license_id: str) -> str: + """Creates a kubernetes workflow of type Job for downloading the data.""" + config.load_config() + + with open(path.join(path.dirname(__file__), "license_deployment.yaml")) as f: + deployment_manifest = yaml.safe_load(f) + deployment_name = f"weather-dl-v2-license-dep-{license_id}" + + # Update the deployment name with a unique identifier + deployment_manifest["metadata"]["name"] = deployment_name + deployment_manifest["spec"]["template"]["spec"]["containers"][0]["args"] = ["--license", license_id] + + # Create an instance of the Kubernetes API client + api_instance = client.AppsV1Api() + # Create the deployment in the specified namespace + response = api_instance.create_namespaced_deployment(body=deployment_manifest, namespace='default') + + print("Deployment created successfully:", response.metadata.name) + return deployment_name + + +def terminate_license_deployment(license_id: str) -> None: + # Load Kubernetes configuration + config.load_kube_config() + + # Create an instance of the Kubernetes API client + api_instance = client.AppsV1Api() + + # Specify the name and namespace of the deployment to delete + deployment_name = f"weather-dl-v2-license-dep-{license_id}" + + # Delete the deployment + api_instance.delete_namespaced_deployment(name=deployment_name, namespace="default") + + print(f"Deployment '{deployment_name}' deleted successfully.") \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/license_deployment.yaml b/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml similarity index 98% rename from weather_dl_v2/license_deployment/license_deployment.yaml rename to weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml index b5d24207..995bdfc2 100644 --- a/weather_dl_v2/license_deployment/license_deployment.yaml +++ b/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml @@ -21,6 +21,7 @@ spec: - name: weather-dl-v2-license-dep image: XXXXXXX imagePullPolicy: Always + args: [] # resources: # # You must specify requests for CPU to autoscale # # based on CPU utilization diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index fbbe923a..f2317384 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -1,6 +1,7 @@ -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter, HTTPException, BackgroundTasks from pydantic import BaseModel from db_service.database import FirestoreClient +from license_dep.deployment_creator import create_license_deployment, terminate_license_deployment db_client = FirestoreClient() @@ -54,14 +55,14 @@ async def update_license(license_id: str, license: License): license_dict = license.dict() db_client._update_license(license_id, license_dict) - # TODO: Add a background task to create k8s deployement for this updated license. - # And update entry of 'k8s_deployment_id' entry in 'license' collection. + + terminate_license_deployment(license_id) + create_deployment(license_id) return {"license_id": license_id, "name": "License updated successfully."} # Add/Update k8s deployment ID for existing license (intenally). -@router.put("/server/{license_id}") -async def update_license_internal(license_id: str, k8s_deployment_id: str): +def update_license_internal(license_id: str, k8s_deployment_id: str): if not db_client._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to update.") license_dict = {"k8s_deployment_id": k8s_deployment_id} @@ -70,25 +71,28 @@ async def update_license_internal(license_id: str, k8s_deployment_id: str): return {"license_id": license_id, "message": "License updated successfully."} +def create_deployment(license_id: str): + k8s_deployment_id = create_license_deployment(license_id) + update_license_internal(license_id, k8s_deployment_id) + + # Add new license @router.post("/") -async def add_license(license: License): +async def add_license(license: License, background_tasks: BackgroundTasks = BackgroundTasks()): license_dict = license.dict() license_dict['k8s_deployment_id'] = "" license_id = db_client._add_license(license_dict) db_client._create_license_queue(license_id, license_dict['client_name']) - # TODO: Add a background task to create k8s deployement for this newly added license. - # And update entry of 'k8s_deployment_id' entry in 'license' collection. + background_tasks.add_task(create_deployment, license_id) return {"license_id": license_id, "message": "License added successfully."} # Remove license @router.delete("/{license_id}") -async def delete_license(license_id: str): +async def delete_license(license_id: str, background_tasks: BackgroundTasks = BackgroundTasks()): if not db_client._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to delete.") db_client._delete_license(license_id) db_client._remove_license_queue(license_id) - # TODO: Add a background task to delete k8s deployement for this deleted license. - # And update entry of 'k8s_deployment_id' entry in 'license' collection. + background_tasks.add_task(terminate_license_deployment, license_id) return {"license_id": license_id, "message": "License removed successfully."} From 2bd33660e6fe342683ec900b23c9b5562537850a Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Mon, 12 Jun 2023 08:23:24 +0000 Subject: [PATCH 11/51] Minor directory restructuring. --- weather_dl_v2/downloader_kubernetes/README.md | 11 ------- weather_dl_v2/fastapi-server/Dockerfile | 1 - weather_dl_v2/fastapi-server/README.md | 7 +++++ weather_dl_v2/license_deployment/Dockerfile | 1 - weather_dl_v2/license_deployment/README.md | 29 +++++-------------- .../downloader.yaml | 0 6 files changed, 14 insertions(+), 35 deletions(-) rename weather_dl_v2/{downloader_kubernetes => license_deployment}/downloader.yaml (100%) diff --git a/weather_dl_v2/downloader_kubernetes/README.md b/weather_dl_v2/downloader_kubernetes/README.md index 2fa1cc76..0783740d 100644 --- a/weather_dl_v2/downloader_kubernetes/README.md +++ b/weather_dl_v2/downloader_kubernetes/README.md @@ -21,14 +21,3 @@ export REPO= eg:weather-tools gcloud builds submit Dockerfile --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-downloader" --timeout=79200 --machine-type=e2-highcpu-32 ``` - -* **Add path of created downloader image in downloader.yaml**: -``` -Please write down the downloader's docker image path at Line 11 of downloader.yaml. -``` - -## General Commands -* **For viewing the current pods**: -``` -kubectl get pods -``` diff --git a/weather_dl_v2/fastapi-server/Dockerfile b/weather_dl_v2/fastapi-server/Dockerfile index e39e47a8..08e0c60c 100644 --- a/weather_dl_v2/fastapi-server/Dockerfile +++ b/weather_dl_v2/fastapi-server/Dockerfile @@ -10,7 +10,6 @@ RUN conda install -n base conda-libmamba-solver RUN conda config --set solver libmamba COPY . . -COPY ../license_deployment/license_deployment.yaml . # Create conda env using environment.yml RUN conda env create -f environment.yml --debug diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 8faaf249..10004215 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -14,6 +14,13 @@ uvicorn main:app --reload * Open your browser at http://127.0.0.1:8000. + +* **Add path of created license deployment image in license_dep/license_deployment.yaml**: +``` +Please write down the license deployment's docker image path at Line 22 of license_deployment.yaml. +``` + + * **Create docker image for server**: ``` export PROJECT_ID= diff --git a/weather_dl_v2/license_deployment/Dockerfile b/weather_dl_v2/license_deployment/Dockerfile index 579237dc..2a02d59d 100644 --- a/weather_dl_v2/license_deployment/Dockerfile +++ b/weather_dl_v2/license_deployment/Dockerfile @@ -8,7 +8,6 @@ RUN conda install -n base conda-libmamba-solver RUN conda config --set solver libmamba COPY . . -COPY ../downloader/downloader.yaml . # Create conda env using environment.yml RUN conda env create -f environment.yml --debug diff --git a/weather_dl_v2/license_deployment/README.md b/weather_dl_v2/license_deployment/README.md index 97f32647..d33628b6 100644 --- a/weather_dl_v2/license_deployment/README.md +++ b/weather_dl_v2/license_deployment/README.md @@ -7,31 +7,16 @@ conda env create --name weather-dl-v2-license-dep --file=environment.yml conda activate weather-dl-v2-license-dep ``` -* **Create docker image for license deployment**: -``` -export PROJECT_ID= -export REPO= eg:weather-tools - -gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-license-dep" --timeout=79200 --machine-type=e2-highcpu-32 -``` - -* **Add path of created license deployment image in license_deployment.yaml**: +* **Add path of created downloader image in downloader.yaml**: ``` -Please write down the license deployment's docker image path at Line 22 of license_deployment.yaml. +Please write down the downloader's docker image path at Line 11 of downloader.yaml. ``` -* **Deploy license deployment's on kubernetes:** -We are not required to this step manually, this will be done by fastapi-server. -``` -kubectl apply -f license_deployment.yaml -- --license -``` -## General Commands -* **For viewing the current pods**: -``` -kubectl get pods +* **Create docker image for license deployment**: ``` +export PROJECT_ID= +export REPO= eg:weather-tools -* **For deleting existing deployment**: -``` -kubectl delete -f ./license_deployment.yaml --force \ No newline at end of file +gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-license-dep" --timeout=79200 --machine-type=e2-highcpu-32 +``` \ No newline at end of file diff --git a/weather_dl_v2/downloader_kubernetes/downloader.yaml b/weather_dl_v2/license_deployment/downloader.yaml similarity index 100% rename from weather_dl_v2/downloader_kubernetes/downloader.yaml rename to weather_dl_v2/license_deployment/downloader.yaml From 32b98a2f4f81a0b0b4592593b1095930f18a1fe6 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Mon, 12 Jun 2023 13:00:21 +0000 Subject: [PATCH 12/51] Minor fixes related to license-deployment. --- .../fastapi-server/db_service/database.py | 4 +- .../license_dep/deployment_creator.py | 6 +-- .../license_dep/license_deployment.yaml | 39 +------------------ weather_dl_v2/fastapi-server/main.py | 4 +- 4 files changed, 7 insertions(+), 46 deletions(-) diff --git a/weather_dl_v2/fastapi-server/db_service/database.py b/weather_dl_v2/fastapi-server/db_service/database.py index 0765f63e..144408a7 100644 --- a/weather_dl_v2/fastapi-server/db_service/database.py +++ b/weather_dl_v2/fastapi-server/db_service/database.py @@ -156,9 +156,7 @@ def _delete_license(self, license_id: str) -> None: print(f"Removed {license_id} in 'license' collection. Update_time: {timestamp}.") def _update_license(self, license_id: str, license_dict: dict) -> None: - result: WriteResult = self._get_db().collection('license').document(license_id).update({ - license_dict - }) + result: WriteResult = self._get_db().collection('license').document(license_id).update(license_dict) print(f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}.") def _create_license_queue(self, license_id: str, client_name: str) -> None: diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py index 61f3e6d5..1928d8f9 100644 --- a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -10,7 +10,7 @@ def create_license_deployment(license_id: str) -> str: with open(path.join(path.dirname(__file__), "license_deployment.yaml")) as f: deployment_manifest = yaml.safe_load(f) - deployment_name = f"weather-dl-v2-license-dep-{license_id}" + deployment_name = f"weather-dl-v2-license-dep-{license_id}".lower() # Update the deployment name with a unique identifier deployment_manifest["metadata"]["name"] = deployment_name @@ -27,13 +27,13 @@ def create_license_deployment(license_id: str) -> str: def terminate_license_deployment(license_id: str) -> None: # Load Kubernetes configuration - config.load_kube_config() + config.load_config() # Create an instance of the Kubernetes API client api_instance = client.AppsV1Api() # Specify the name and namespace of the deployment to delete - deployment_name = f"weather-dl-v2-license-dep-{license_id}" + deployment_name = f"weather-dl-v2-license-dep-{license_id}".lower() # Delete the deployment api_instance.delete_namespaced_deployment(name=deployment_name, namespace="default") diff --git a/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml b/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml index 995bdfc2..db27e2b1 100644 --- a/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml +++ b/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml @@ -1,4 +1,3 @@ ---- # weather-dl-v2-license-dep Deployment # Defines the deployment of the app running in a pod on any worker node apiVersion: apps/v1 @@ -26,40 +25,4 @@ spec: # # You must specify requests for CPU to autoscale # # based on CPU utilization # requests: - # cpu: "250m" ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: weather-dl-v2-license-dep -rules: - - apiGroups: - - "" - - "apps" - - "batch" - resources: - - endpoints - - deployments - - pods - - jobs - verbs: - - get - - list - - watch - - create - - delete ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: weather-dl-v2-license-dep - namespace: default -subjects: - - kind: ServiceAccount - name: default - namespace: default -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: weather-dl-v2-license-dep ---- \ No newline at end of file + # cpu: "250m" \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 33abdf79..d7a0df85 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -8,8 +8,8 @@ async def lifespan(app: FastAPI): # Boot up # TODO: Replace hard-coded collection name by read a server config. - print("Create database if not already exists.") - print("Retrieve license information & create license deployment if needed.") + print("TODO: Create database if not already exists.") + print("TODO: Retrieve license information & create license deployment if needed.") yield # Clean up From ee8de36742b61eb0b071e4bf70df4a3078fa3edd Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Tue, 13 Jun 2023 18:36:17 +0530 Subject: [PATCH 13/51] weather-dl-v2 `cli` (#346) * Cli Init * Added network service. * Added Download subcommand and service. * Added Queue subcommand and service. * Added License subcommand and service. * Updated readme and dockerfile. * Update README.md * Update Dockerfile * updated readme * added missing logger * added logs before exiting in network service --- weather_dl_v2/cli/.gitignore | 162 ++++++++++++++++++ weather_dl_v2/cli/Dockerfile | 14 ++ weather_dl_v2/cli/README.md | 55 ++++++ weather_dl_v2/cli/app/__init__.py | 0 weather_dl_v2/cli/app/config.py | 8 + weather_dl_v2/cli/app/main.py | 30 ++++ weather_dl_v2/cli/app/services/__init__.py | 0 .../cli/app/services/download_service.py | 85 +++++++++ .../cli/app/services/license_service.py | 89 ++++++++++ .../cli/app/services/network_service.py | 67 ++++++++ .../cli/app/services/queue_service.py | 74 ++++++++ weather_dl_v2/cli/app/subcommands/__init__.py | 0 weather_dl_v2/cli/app/subcommands/download.py | 53 ++++++ weather_dl_v2/cli/app/subcommands/license.py | 88 ++++++++++ weather_dl_v2/cli/app/subcommands/queue.py | 53 ++++++ weather_dl_v2/cli/app/utils.py | 63 +++++++ weather_dl_v2/cli/environment.yml | 13 ++ weather_dl_v2/cli/setup.py | 17 ++ 18 files changed, 871 insertions(+) create mode 100644 weather_dl_v2/cli/.gitignore create mode 100644 weather_dl_v2/cli/Dockerfile create mode 100644 weather_dl_v2/cli/README.md create mode 100644 weather_dl_v2/cli/app/__init__.py create mode 100644 weather_dl_v2/cli/app/config.py create mode 100644 weather_dl_v2/cli/app/main.py create mode 100644 weather_dl_v2/cli/app/services/__init__.py create mode 100644 weather_dl_v2/cli/app/services/download_service.py create mode 100644 weather_dl_v2/cli/app/services/license_service.py create mode 100644 weather_dl_v2/cli/app/services/network_service.py create mode 100644 weather_dl_v2/cli/app/services/queue_service.py create mode 100644 weather_dl_v2/cli/app/subcommands/__init__.py create mode 100644 weather_dl_v2/cli/app/subcommands/download.py create mode 100644 weather_dl_v2/cli/app/subcommands/license.py create mode 100644 weather_dl_v2/cli/app/subcommands/queue.py create mode 100644 weather_dl_v2/cli/app/utils.py create mode 100644 weather_dl_v2/cli/environment.yml create mode 100644 weather_dl_v2/cli/setup.py diff --git a/weather_dl_v2/cli/.gitignore b/weather_dl_v2/cli/.gitignore new file mode 100644 index 00000000..251ecb52 --- /dev/null +++ b/weather_dl_v2/cli/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +.ruff_cache/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/weather_dl_v2/cli/Dockerfile b/weather_dl_v2/cli/Dockerfile new file mode 100644 index 00000000..f8e0b336 --- /dev/null +++ b/weather_dl_v2/cli/Dockerfile @@ -0,0 +1,14 @@ +FROM continuumio/miniconda3:latest + +COPY . . + +# Create conda env using environment.yml +RUN conda update conda -y +RUN conda env create --name weather-dl-v2-cli --file=environment.yml + +# Activate the conda env and update the PATH +ARG CONDA_ENV_NAME=weather-dl-v2-cli +RUN echo "source activate ${CONDA_ENV_NAME}" >> ~/.bashrc +ENV PATH /opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH + +ENV BASE_URI=http://:8080 diff --git a/weather_dl_v2/cli/README.md b/weather_dl_v2/cli/README.md new file mode 100644 index 00000000..5fe0e943 --- /dev/null +++ b/weather_dl_v2/cli/README.md @@ -0,0 +1,55 @@ +# weather-dl-cli +This is a command line interface for talking to the weather-dl-v2 FastAPI server. + +- Due to our org level policy we can't expose external-ip using LoadBalancer Service +while deploying our FastAPI server. + +Replace the FastAPI server pod's IP in Dockerfile (at line 8). +``` +ENV BASE_URI=http://:8080 +``` +> Note: Command to get the Pod IP : `kubectl get pods -o wide`. +> +> Though note that in case of Pod restart IP might get change. So we need to look +> for better solution for the same. + +## Create docker image for weather-dl-cli + +``` +export PROJECT_ID= +export REPO= eg:weather-tools + +gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-cli" --timeout=79200 --machine-type=e2-highcpu-32 + +``` +## Create a VM using above created docker-image +``` +export ZONE= eg: us-cental1-a +export SERVICE_ACCOUNT= # Let's keep this as Compute Engine Default Service Account +export IMAGE_PATH= # The above created image-path + +gcloud compute instances create-with-container weather-dl-cli \ + --project=$PROJECT_ID \ + --zone=$ZONE \ + --machine-type=e2-medium \ + --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default \ + --maintenance-policy=MIGRATE \ + --provisioning-model=STANDARD \ + --service-account=$SERVICE_ACCOUNT \ + --scopes=https://www.googleapis.com/auth/cloud-platform \ + --tags=http-server,https-server \ + --image=projects/cos-cloud/global/images/cos-stable-105-17412-101-4 \ + --boot-disk-size=10GB \ + --boot-disk-type=pd-balanced \ + --boot-disk-device-name=weather-dl-cli-server \ + --container-image=$IMAGE_PATH \ + --container-restart-policy=on-failure \ + --no-shielded-secure-boot \ + --shielded-vtpm \ + --shielded-integrity-monitoring \ + --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-4 +``` +## Use the cli after doing ssh in the above created VM +``` +weather-dl-v2 --help +``` diff --git a/weather_dl_v2/cli/app/__init__.py b/weather_dl_v2/cli/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/cli/app/config.py b/weather_dl_v2/cli/app/config.py new file mode 100644 index 00000000..390d8611 --- /dev/null +++ b/weather_dl_v2/cli/app/config.py @@ -0,0 +1,8 @@ +import os + +class Config: + def __init__(self): + if "BASE_URI" in os.environ: + self.BASE_URI = os.environ["BASE_URI"] + else: + raise KeyError("BASE_URI not in enviroment.") \ No newline at end of file diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py new file mode 100644 index 00000000..0b2435e0 --- /dev/null +++ b/weather_dl_v2/cli/app/main.py @@ -0,0 +1,30 @@ +import typer +import logging +from app.config import Config +import requests +from app.subcommands import download, queue, license + +logger = logging.getLogger(__name__) + +app = typer.Typer(help="weather-dl-v2 is a cli tool for communicating with FastAPI server.") + +app.add_typer(download.app, name="download", help="Manage downloads.") +app.add_typer(queue.app, name="queue", help="Manage queues.") +app.add_typer(license.app, name="license", help="Manage licenses.") + + + +@app.command(help="Check if FastAPI server is live and rechable") +def ping(): + uri = f"{Config().BASE_URI}" + + try: + x = requests.get(uri) + except requests.exceptions.RequestException as e: + raise SystemExit(e) + + + logger.info(x.text) + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/weather_dl_v2/cli/app/services/__init__.py b/weather_dl_v2/cli/app/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py new file mode 100644 index 00000000..8db8f0c2 --- /dev/null +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -0,0 +1,85 @@ +import abc +import logging +import typing as t +from app.services.network_service import network_service +from app.config import Config + + +logger = logging.getLogger(__name__) + + + +class DownloadService(abc.ABC): + + @abc.abstractmethod + def _list_all_downloads(self): + pass + + @abc.abstractmethod + def _list_all_downloads_by_client_name(self, client_name: str): + pass + + @abc.abstractmethod + def _get_download_by_config(self, config_name: str): + pass + + @abc.abstractmethod + def _add_new_download(self, file_path: str, licenses: t.List[str]): + pass + + @abc.abstractmethod + def _remove_download(self, config_name: str): + pass + +class DownloadServiceNetwork(DownloadService): + def __init__(self): + self.endpoint = f"{Config().BASE_URI}/download" + + def _list_all_downloads(self): + return network_service.get( + uri = self.endpoint, + header = {"accept": "application/json"} + ) + + def _list_all_downloads_by_client_name(self, client_name: str): + return network_service.get( + uri = self.endpoint, + header = {"accept": "application/json"}, + query = {"client_name": client_name} + ) + + def _get_download_by_config(self, config_name: str): + return network_service.get( + uri = f"{self.endpoint}/download{config_name}", + header = {"accept": "application/json"} + ) + + def _add_new_download(self, file_path: str, licenses: t.List[str]): + try: + file = {"file" : open(file_path, 'rb')} + except FileNotFoundError: + return "File not found." + + return network_service.post( + uri=self.endpoint, + header = {"accept": "application/json"}, + file = file, + payload = {"licenses": licenses} + ) + + def _remove_download(self, config_name: str): + return network_service.delete( + uri=f"{self.endpoint}/{config_name}", + header = {"accept": "application/json"} + ) + +class DownloadServiceMock(DownloadService): + pass + +def get_download_service(test: bool = False): + if test: + return DownloadServiceMock() + else: + return DownloadServiceNetwork() + +download_service = get_download_service() \ No newline at end of file diff --git a/weather_dl_v2/cli/app/services/license_service.py b/weather_dl_v2/cli/app/services/license_service.py new file mode 100644 index 00000000..918bf2c3 --- /dev/null +++ b/weather_dl_v2/cli/app/services/license_service.py @@ -0,0 +1,89 @@ +import abc +import logging +import json +import typing as t +from app.services.network_service import network_service +from app.config import Config + +logger = logging.getLogger(__name__) + +class LicenseService(abc.ABC): + @abc.abstractmethod + def _get_all_license(self): + pass + + @abc.abstractmethod + def _get_all_license_by_client_name(self, client_name: str): + pass + + @abc.abstractmethod + def _get_license_by_license_id(self, license_id: str): + pass + + @abc.abstractmethod + def _add_license(self, license_dict: dict): + pass + + @abc.abstractmethod + def _remove_license(self, license_id: str): + pass + + @abc.abstractmethod + def _update_license(self, license_id: str, license_dict: dict): + pass + + +class LicneseServiceNetwork(LicenseService): + def __init__(self): + self.endpoint = f"{Config().BASE_URI}/license" + + def _get_all_license(self): + return network_service.get( + uri = self.endpoint, + header = {"accept": "application/json"} + ) + + def _get_all_license_by_client_name(self, client_name: str): + return network_service.get( + uri = self.endpoint, + header = {"accept": "application/json"}, + query = {"client_name": client_name} + ) + + def _get_license_by_license_id(self, license_id: str): + return network_service.get( + uri = f"{self.endpoint}/{license_id}", + header = {"accept": "application/json"}, + ) + + def _add_license(self, license_dict: dict): + return network_service.post( + uri = self.endpoint, + header = {"accept": "application/json"}, + payload = json.dumps(license_dict) + ) + + def _remove_license(self, license_id: str): + return network_service.delete( + uri = f"{self.endpoint}/{license_id}", + header = {"accept": "application/json"}, + ) + + def _update_license(self, license_id: str, license_dict: dict): + return network_service.put( + uri = f"{self.endpoint}/{license_id}", + header = {"accept": "application/json"}, + payload = json.dumps(license_dict) + ) + + +class LicenseServiceMock(LicenseService): + pass + +def get_license_service(test: bool = False): + if test: + return LicenseServiceMock() + else: + return LicneseServiceNetwork() + +license_service = get_license_service() diff --git a/weather_dl_v2/cli/app/services/network_service.py b/weather_dl_v2/cli/app/services/network_service.py new file mode 100644 index 00000000..dbce8961 --- /dev/null +++ b/weather_dl_v2/cli/app/services/network_service.py @@ -0,0 +1,67 @@ +import requests +import json +import logging +from time import time + +logger = logging.getLogger(__name__) + +def timeit(func): + def wrap_func(*args, **kwargs): + t1 = time() + result = func(*args, **kwargs) + t2 = time() + print(f'[executed in {(t2-t1):.4f}s.]') + return result + return wrap_func + + +class NetworkService: + def parse_response(self, response: requests.Response): + try: + parsed = json.loads(response.text) + except Exception as e: + logger.info(f"Parsing error: {e}.") + logger.info(f"Status code {response.status_code}") + logger.info(f"Response {response.text}") + return + + return json.dumps(parsed, indent=3) + + @timeit + def get(self, uri, header, query=None, payload=None): + try: + x = requests.get(uri, params=query, headers=header, data=payload) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) + + @timeit + def post(self, uri, header, query=None, payload=None, file=None): + try: + x = requests.post(uri, params=query, headers=header, data=payload, files=file) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) + + @timeit + def put(self, uri, header, query=None, payload=None, file=None): + try: + x = requests.put(uri, params=query, headers=header, data=payload, files=file) + + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) + + @timeit + def delete(self, uri, header, query=None): + try: + x = requests.delete(uri, params=query, headers=header) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) + +network_service = NetworkService() \ No newline at end of file diff --git a/weather_dl_v2/cli/app/services/queue_service.py b/weather_dl_v2/cli/app/services/queue_service.py new file mode 100644 index 00000000..ab03b1da --- /dev/null +++ b/weather_dl_v2/cli/app/services/queue_service.py @@ -0,0 +1,74 @@ +import abc +import logging +import json +import typing as t +from app.services.network_service import network_service +from app.config import Config + + +logger = logging.getLogger(__name__) + + + +class QueueService(abc.ABC): + + @abc.abstractmethod + def _list_all_queues(self): + pass + + @abc.abstractmethod + def _list_all_queues_by_client_name(self, client_name: str): + pass + + @abc.abstractmethod + def _get_queue_by_license(self, license_id: str): + pass + + @abc.abstractmethod + def _edit_queues(self, license_id: str, priority_list: t.List[str]): + pass + +class QueueServiceNetwork(QueueService): + def __init__(self): + self.endpoint = f"{Config().BASE_URI}/queues" + + def _list_all_queues(self): + return network_service.get( + uri = self.endpoint, + header = {"accept": "application/json"} + ) + + def _list_all_queues_by_client_name(self, client_name: str): + return network_service.get( + uri = self.endpoint, + header = {"accept": "application/json"}, + query = {"client_name": client_name} + ) + + def _get_queue_by_license(self, license_id: str): + return network_service.get( + uri = f"{self.endpoint}/{license_id}", + header = {"accept": "application/json"} + ) + + def _edit_queues(self, license_id: str, priority_list: t.List[str]): + return network_service.post( + uri = f"{self.endpoint}/{license_id}", + header = { + "accept": "application/json", + 'Content-Type': 'application/json' + }, + payload = json.dumps(priority_list) + ) + +class QueueServiceMock(QueueService): + pass + + +def get_queue_service(test: bool = False): + if test: + return QueueServiceMock() + else: + return QueueServiceNetwork() + +queue_service = get_queue_service() \ No newline at end of file diff --git a/weather_dl_v2/cli/app/subcommands/__init__.py b/weather_dl_v2/cli/app/subcommands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py new file mode 100644 index 00000000..d10cfd19 --- /dev/null +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -0,0 +1,53 @@ +import typer +import json +from typing_extensions import Annotated +from app.services.download_service import download_service +from app.utils import Validator +from typing import List + +app = typer.Typer() + +class DowloadFilterValidator(Validator): + pass + +@app.command(help="List out all the configs.") +def list( + filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None + ): + if filter: + validator = DowloadFilterValidator(valid_keys=["client_name"]) + + try: + data = validator.validate(filters=[filter]) + client_name = data['client_name'] + except Exception as e: + print(f"filter error: {e}") + return + + print(download_service._list_all_downloads_by_client_name(client_name)) + return + + print(download_service._list_all_downloads()) + +@app.command(help="Add new configs.") +def add( + file_path: Annotated[str, typer.Argument(help="File path of config to be uploaded")], + license: Annotated[List[str], typer.Option("--license", "-l", help="License ID.")] = [], + ): + if len(license) == 0: + print("No licenses mentioned. Please specify licenese Id.") + return + + print(download_service._add_new_download(file_path, license)) + +@app.command(help="Get a particular config.") +def config( + config_name: Annotated[str, typer.Argument(help="Config file name.")] + ): + print(download_service._get_download_by_config(config_name)) + +@app.command(help="Remove existing configs.") +def remove( + config_name: Annotated[str, typer.Argument(help="Config file name.")] + ): + print(download_service._remove_download(config_name)) diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py new file mode 100644 index 00000000..597b752b --- /dev/null +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -0,0 +1,88 @@ +import typer +from typing_extensions import Annotated +from app.services.license_service import license_service +from app.utils import Validator +from typing import List + + +app = typer.Typer() + + +class LicenseValidator(Validator): + pass + +@app.command(help="List all licenses.") +def list( + filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None + ): + if filter: + validator = LicenseValidator(valid_keys=["client_name"]) + + try: + data = validator.validate(filters=[filter]) + client_name = data['client_name'] + except Exception as e: + print(f"filter error: {e}") + return + + print(license_service._get_all_license_by_client_name(client_name)) + return + + print(license_service._get_all_license()) + +@app.command("get", help="Get a particular license by ID.") +def get_license( + license: Annotated[str, typer.Argument(help="License ID.")] + ): + print(license_service._get_license_by_license_id(license)) + +@app.command(help="Add new license.") +def add( + file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "api_key" : , "api_url" : }''')], + ): + validator = LicenseValidator( + valid_keys=[ + "client_name", + "number_of_requests", + "api_key", + "api_url", + "api_email" + ] + ) + + try: + license_dict = validator.validate_json(file_path=file_path) + except Exception as e: + print(f"payload error: {e}") + return + + print(license_service._add_license(license_dict)) + +@app.command(help="Remove a license") +def remove( + license: Annotated[str, typer.Argument( help="License ID.")] + ): + print(license_service._remove_license(license)) + +@app.command(help="Update existing license.") +def update( + license: Annotated[str, typer.Argument(help="License ID.")], + file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "api_key" : , "api_url" : }''')] + ): + validator = LicenseValidator( + valid_keys=[ + "client_name", + "number_of_requests", + "api_key", + "api_url", + "api_email" + ] + ) + try: + license_dict = validator.validate_json(file_path=file_path) + except Exception as e: + print(f"payload error: {e}") + return + + print(license_service._update_license(license, license_dict)) + diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py new file mode 100644 index 00000000..7022dc7e --- /dev/null +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -0,0 +1,53 @@ +import typer +from typing_extensions import Annotated +from app.services.queue_service import queue_service +from app.utils import Validator + +app = typer.Typer() + +class QueueValidator(Validator): + pass + +@app.command(help="List all the queues.") +def list( + filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None + ): + if filter: + + validator = QueueValidator(valid_keys=["client_name"]) + + try: + data = validator.validate(filters=[filter]) + client_name = data['client_name'] + except Exception as e: + print(f"filter error: {e}") + return + + print(queue_service._list_all_queues_by_client_name(client_name)) + return + + print(queue_service._list_all_queues()) + +@app.command(help="Get queue of particular license.") +def license_queue( + license: Annotated[str, typer.Argument(help="License ID")], + ): + print(queue_service._get_queue_by_license(license)) + +@app.command(help="Edit existing queues.") +def edit( + license: Annotated[str, typer.Argument(help="License ID.")], + file_path: Annotated[str, typer.Argument(help='''File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}''')] + ): + validator = QueueValidator(valid_keys=["priority"]) + + try: + data = validator.validate_json(file_path=file_path) + priority_list = data["priority"] + except Exception as e: + print(f"key error: {e}") + return + + print(queue_service._edit_queues(license, priority_list)) + + diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py new file mode 100644 index 00000000..70afe9a6 --- /dev/null +++ b/weather_dl_v2/cli/app/utils.py @@ -0,0 +1,63 @@ +import abc +import logging +import dataclasses +import typing as t +import json + +logger = logging.getLogger(__name__) + +@dataclasses.dataclass +class Validator(abc.ABC): + + valid_keys: t.List[str] + + def validate(self, filters: t.List[str], show_valid_filters=True): + filter_dict = {} + + for filter in filters: + _filter = filter.split("=") + + if(len(_filter)!=2): + if show_valid_filters: + logger.info(f"valid filters are: {self.valid_keys}.") + raise ValueError("Incorrect Filter. Please Try again.") + + key, value = _filter + filter_dict[key] = value + + data_set = set(filter_dict.keys()) + valid_set = set(self.valid_keys) + + if self._validate_keys(data_set, valid_set): + return filter_dict + + def validate_json(self, file_path): + try: + with open(file_path) as f: + data: dict = json.load(f) + data_keys = data.keys() + + data_set = set(data_keys) + valid_set = set(self.valid_keys) + + if self._validate_keys(data_set, valid_set): + return data + + except FileNotFoundError: + logger.info("file not found.") + raise FileNotFoundError + + def _validate_keys(self, data_set: set, valid_set: set): + if data_set == valid_set: + return True + + missing_keys = valid_set.difference(data_set) + invalid_keys = data_set.difference(valid_set) + + if len(missing_keys) > 0: + raise ValueError(f"keys {missing_keys} are missing in file.") + + if len(invalid_keys) > 0: + raise ValueError(f"keys {invalid_keys} are invalid keys.") + + return False \ No newline at end of file diff --git a/weather_dl_v2/cli/environment.yml b/weather_dl_v2/cli/environment.yml new file mode 100644 index 00000000..74cc375c --- /dev/null +++ b/weather_dl_v2/cli/environment.yml @@ -0,0 +1,13 @@ +name: weather-dl-v2-cli +channels: + - conda-forge +dependencies: + - python=3.10 + - pip=23.0.1 + - typer=0.9.0 + - pip: + - requests + - ruff + - pytype + - pytest + - . diff --git a/weather_dl_v2/cli/setup.py b/weather_dl_v2/cli/setup.py new file mode 100644 index 00000000..fab1c923 --- /dev/null +++ b/weather_dl_v2/cli/setup.py @@ -0,0 +1,17 @@ +from setuptools import setup + +requirements = ['typer', 'requests'] + +setup( + name = "weather-dl-v2", + packages=["app", "app.subcommands", "app.services"], + install_requires=requirements, + version = "0.0.1", + author = "aniket", + description = ("This cli tools helps in interacting with weather dl v2 fast API server"), + entry_points={ + "console_scripts": [ + "weather-dl-v2=app.main:app" + ] + } +) From 535ebe2d9de372b42a3ac4a7a8a2e2a001904fde Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Wed, 14 Jun 2023 12:03:10 +0000 Subject: [PATCH 14/51] Made use of google-secret-manager for storing license keys. --- weather_dl_v2/fastapi-server/routers/license.py | 4 +--- weather_dl_v2/license_deployment/environment.yml | 1 + weather_dl_v2/license_deployment/fetch.py | 15 +++++++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index f2317384..2a796daa 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -11,9 +11,7 @@ class License(BaseModel): client_name: str number_of_requests: int - api_key: str - api_url: str - api_email: str + secret_id: str class LicenseInternal(License): diff --git a/weather_dl_v2/license_deployment/environment.yml b/weather_dl_v2/license_deployment/environment.yml index 966ec213..4848fafd 100644 --- a/weather_dl_v2/license_deployment/environment.yml +++ b/weather_dl_v2/license_deployment/environment.yml @@ -9,6 +9,7 @@ dependencies: - pip=22.3 - pip: - kubernetes + - google-cloud-secret-manager - aiohttp - numpy - xarray diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index 5fec64b2..67d97ae1 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -1,4 +1,5 @@ from concurrent.futures import ThreadPoolExecutor +from google.cloud import secretmanager import json import logging import time @@ -11,7 +12,8 @@ from manifest import FirestoreManifest db_client = FirestoreClient() - +secretmanager_client = secretmanager.SecretManagerServiceClient() + logger = logging.getLogger(__name__) def create_job(request, result): @@ -81,9 +83,14 @@ def boot_up(license: str) -> None: license_id = license client_name = result['client_name'] concurrency_limit = result['number_of_requests'] - os.environ.setdefault('CLIENT_URL', result['api_url']) - os.environ.setdefault('CLIENT_KEY', result['api_key']) - os.environ.setdefault('CLIENT_EMAIL', result['api_email']) + + response = secretmanager_client.access_secret_version(request={"name": result['secret_id']}) + payload = response.payload.data.decode("UTF-8") + secret_dict = json.loads(payload) + + os.environ.setdefault('CLIENT_URL', secret_dict.get('api_url', "")) + os.environ.setdefault('CLIENT_KEY', secret_dict.get('api_key', "")) + os.environ.setdefault('CLIENT_EMAIL', secret_dict.get('api_email', "")) if __name__ == "__main__": From 0754510a8b7639ed4dfdc9321c703a571581aeaf Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Thu, 15 Jun 2023 12:46:52 +0000 Subject: [PATCH 15/51] Necessary code changes in weather-dl-v2 cli. --- weather_dl_v2/cli/.gitignore | 162 ------------------ weather_dl_v2/cli/README.md | 19 +- weather_dl_v2/cli/app/config.py | 2 +- weather_dl_v2/cli/app/main.py | 3 +- .../cli/app/services/download_service.py | 3 - .../cli/app/services/license_service.py | 4 +- .../cli/app/services/queue_service.py | 15 +- weather_dl_v2/cli/app/subcommands/download.py | 21 ++- weather_dl_v2/cli/app/subcommands/license.py | 30 ++-- weather_dl_v2/cli/app/subcommands/queue.py | 21 ++- weather_dl_v2/cli/setup.py | 2 +- 11 files changed, 57 insertions(+), 225 deletions(-) delete mode 100644 weather_dl_v2/cli/.gitignore diff --git a/weather_dl_v2/cli/.gitignore b/weather_dl_v2/cli/.gitignore deleted file mode 100644 index 251ecb52..00000000 --- a/weather_dl_v2/cli/.gitignore +++ /dev/null @@ -1,162 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -.ruff_cache/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file diff --git a/weather_dl_v2/cli/README.md b/weather_dl_v2/cli/README.md index 5fe0e943..a25eb7eb 100644 --- a/weather_dl_v2/cli/README.md +++ b/weather_dl_v2/cli/README.md @@ -2,7 +2,8 @@ This is a command line interface for talking to the weather-dl-v2 FastAPI server. - Due to our org level policy we can't expose external-ip using LoadBalancer Service -while deploying our FastAPI server. +while deploying our FastAPI server. Hence we need to deploy the CLI on a VM to interact +through our fastapi server. Replace the FastAPI server pod's IP in Dockerfile (at line 8). ``` @@ -22,13 +23,14 @@ export REPO= eg:weather-tools gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-cli" --timeout=79200 --machine-type=e2-highcpu-32 ``` + ## Create a VM using above created docker-image ``` export ZONE= eg: us-cental1-a export SERVICE_ACCOUNT= # Let's keep this as Compute Engine Default Service Account export IMAGE_PATH= # The above created image-path -gcloud compute instances create-with-container weather-dl-cli \ +gcloud compute instances create-with-container weather-dl-v2-cli \ --project=$PROJECT_ID \ --zone=$ZONE \ --machine-type=e2-medium \ @@ -38,18 +40,25 @@ gcloud compute instances create-with-container weather-dl-cli \ --service-account=$SERVICE_ACCOUNT \ --scopes=https://www.googleapis.com/auth/cloud-platform \ --tags=http-server,https-server \ - --image=projects/cos-cloud/global/images/cos-stable-105-17412-101-4 \ + --image=projects/cos-cloud/global/images/cos-stable-105-17412-101-24 \ --boot-disk-size=10GB \ --boot-disk-type=pd-balanced \ - --boot-disk-device-name=weather-dl-cli-server \ + --boot-disk-device-name=weather-dl-v2-cli \ --container-image=$IMAGE_PATH \ --container-restart-policy=on-failure \ --no-shielded-secure-boot \ --shielded-vtpm \ --shielded-integrity-monitoring \ - --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-4 + --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-24 +``` + +## Get the weather-dl-v2 docker image id ``` +docker images +``` + ## Use the cli after doing ssh in the above created VM ``` +docker run -it weather-dl-v2 --help ``` diff --git a/weather_dl_v2/cli/app/config.py b/weather_dl_v2/cli/app/config.py index 390d8611..174a9c22 100644 --- a/weather_dl_v2/cli/app/config.py +++ b/weather_dl_v2/cli/app/config.py @@ -5,4 +5,4 @@ def __init__(self): if "BASE_URI" in os.environ: self.BASE_URI = os.environ["BASE_URI"] else: - raise KeyError("BASE_URI not in enviroment.") \ No newline at end of file + raise KeyError("BASE_URI not in environment.") \ No newline at end of file diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py index 0b2435e0..498b4929 100644 --- a/weather_dl_v2/cli/app/main.py +++ b/weather_dl_v2/cli/app/main.py @@ -13,8 +13,7 @@ app.add_typer(license.app, name="license", help="Manage licenses.") - -@app.command(help="Check if FastAPI server is live and rechable") +@app.command("ping", help="Check if FastAPI server is live and rechable.") def ping(): uri = f"{Config().BASE_URI}" diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 8db8f0c2..e7468afa 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -4,11 +4,8 @@ from app.services.network_service import network_service from app.config import Config - logger = logging.getLogger(__name__) - - class DownloadService(abc.ABC): @abc.abstractmethod diff --git a/weather_dl_v2/cli/app/services/license_service.py b/weather_dl_v2/cli/app/services/license_service.py index 918bf2c3..43239664 100644 --- a/weather_dl_v2/cli/app/services/license_service.py +++ b/weather_dl_v2/cli/app/services/license_service.py @@ -33,7 +33,7 @@ def _update_license(self, license_id: str, license_dict: dict): pass -class LicneseServiceNetwork(LicenseService): +class LicenseServiceNetwork(LicenseService): def __init__(self): self.endpoint = f"{Config().BASE_URI}/license" @@ -84,6 +84,6 @@ def get_license_service(test: bool = False): if test: return LicenseServiceMock() else: - return LicneseServiceNetwork() + return LicenseServiceNetwork() license_service = get_license_service() diff --git a/weather_dl_v2/cli/app/services/queue_service.py b/weather_dl_v2/cli/app/services/queue_service.py index ab03b1da..255fd7d1 100644 --- a/weather_dl_v2/cli/app/services/queue_service.py +++ b/weather_dl_v2/cli/app/services/queue_service.py @@ -5,19 +5,16 @@ from app.services.network_service import network_service from app.config import Config - logger = logging.getLogger(__name__) - - class QueueService(abc.ABC): @abc.abstractmethod - def _list_all_queues(self): + def _get_all_license_queues(self): pass @abc.abstractmethod - def _list_all_queues_by_client_name(self, client_name: str): + def _get_license_queue_by_client_name(self, client_name: str): pass @abc.abstractmethod @@ -25,20 +22,20 @@ def _get_queue_by_license(self, license_id: str): pass @abc.abstractmethod - def _edit_queues(self, license_id: str, priority_list: t.List[str]): + def _edit_license_queue(self, license_id: str, priority_list: t.List[str]): pass class QueueServiceNetwork(QueueService): def __init__(self): self.endpoint = f"{Config().BASE_URI}/queues" - def _list_all_queues(self): + def _get_all_license_queues(self): return network_service.get( uri = self.endpoint, header = {"accept": "application/json"} ) - def _list_all_queues_by_client_name(self, client_name: str): + def _get_license_queue_by_client_name(self, client_name: str): return network_service.get( uri = self.endpoint, header = {"accept": "application/json"}, @@ -51,7 +48,7 @@ def _get_queue_by_license(self, license_id: str): header = {"accept": "application/json"} ) - def _edit_queues(self, license_id: str, priority_list: t.List[str]): + def _edit_license_queue(self, license_id: str, priority_list: t.List[str]): return network_service.post( uri = f"{self.endpoint}/{license_id}", header = { diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index d10cfd19..46e8684a 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -1,5 +1,4 @@ import typer -import json from typing_extensions import Annotated from app.services.download_service import download_service from app.utils import Validator @@ -10,9 +9,9 @@ class DowloadFilterValidator(Validator): pass -@app.command(help="List out all the configs.") -def list( - filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None +@app.command("list", help="List out all the configs.") +def get_downloads( + filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value.")] = None ): if filter: validator = DowloadFilterValidator(valid_keys=["client_name"]) @@ -29,9 +28,9 @@ def list( print(download_service._list_all_downloads()) -@app.command(help="Add new configs.") -def add( - file_path: Annotated[str, typer.Argument(help="File path of config to be uploaded")], +@app.command("add", help="Submit new config to download.") +def submit_download( + file_path: Annotated[str, typer.Argument(help="File path of config to be uploaded.")], license: Annotated[List[str], typer.Option("--license", "-l", help="License ID.")] = [], ): if len(license) == 0: @@ -40,14 +39,14 @@ def add( print(download_service._add_new_download(file_path, license)) -@app.command(help="Get a particular config.") -def config( +@app.command("get", help="Get a particular config.") +def get_download_by_config( config_name: Annotated[str, typer.Argument(help="Config file name.")] ): print(download_service._get_download_by_config(config_name)) -@app.command(help="Remove existing configs.") -def remove( +@app.command("remove", help="Remove existing config.") +def remove_download( config_name: Annotated[str, typer.Argument(help="Config file name.")] ): print(download_service._remove_download(config_name)) diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py index 597b752b..f1c94929 100644 --- a/weather_dl_v2/cli/app/subcommands/license.py +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -2,8 +2,6 @@ from typing_extensions import Annotated from app.services.license_service import license_service from app.utils import Validator -from typing import List - app = typer.Typer() @@ -11,8 +9,8 @@ class LicenseValidator(Validator): pass -@app.command(help="List all licenses.") -def list( +@app.command("list", help="List all licenses.") +def get_all_license( filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None ): if filter: @@ -36,17 +34,15 @@ def get_license( ): print(license_service._get_license_by_license_id(license)) -@app.command(help="Add new license.") -def add( - file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "api_key" : , "api_url" : }''')], +@app.command("add", help="Add new license.") +def add_license( + file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')], ): validator = LicenseValidator( valid_keys=[ "client_name", "number_of_requests", - "api_key", - "api_url", - "api_email" + "secret_id" ] ) @@ -58,24 +54,22 @@ def add( print(license_service._add_license(license_dict)) -@app.command(help="Remove a license") -def remove( +@app.command("remove", help="Remove a license.") +def remove_license( license: Annotated[str, typer.Argument( help="License ID.")] ): print(license_service._remove_license(license)) -@app.command(help="Update existing license.") -def update( +@app.command("update", help="Update existing license.") +def update_license( license: Annotated[str, typer.Argument(help="License ID.")], - file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "api_key" : , "api_url" : }''')] + file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for updated license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')] ): validator = LicenseValidator( valid_keys=[ "client_name", "number_of_requests", - "api_key", - "api_url", - "api_email" + "secret_id" ] ) try: diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py index 7022dc7e..14c3f685 100644 --- a/weather_dl_v2/cli/app/subcommands/queue.py +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -8,8 +8,8 @@ class QueueValidator(Validator): pass -@app.command(help="List all the queues.") -def list( +@app.command("list", help="List all the license queues.") +def get_all_license_queue( filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None ): if filter: @@ -23,19 +23,19 @@ def list( print(f"filter error: {e}") return - print(queue_service._list_all_queues_by_client_name(client_name)) + print(queue_service._get_license_queue_by_client_name(client_name)) return - print(queue_service._list_all_queues()) + print(queue_service._get_all_license_queues()) -@app.command(help="Get queue of particular license.") -def license_queue( +@app.command("get", help="Get queue of particular license.") +def get_license_queue( license: Annotated[str, typer.Argument(help="License ID")], ): print(queue_service._get_queue_by_license(license)) -@app.command(help="Edit existing queues.") -def edit( +@app.command("edit", help="Edit existing license queue.") +def modify_license_queue( license: Annotated[str, typer.Argument(help="License ID.")], file_path: Annotated[str, typer.Argument(help='''File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}''')] ): @@ -48,6 +48,5 @@ def edit( print(f"key error: {e}") return - print(queue_service._edit_queues(license, priority_list)) - - + print(queue_service._edit_license_queue(license, priority_list)) + diff --git a/weather_dl_v2/cli/setup.py b/weather_dl_v2/cli/setup.py index fab1c923..b749f5e1 100644 --- a/weather_dl_v2/cli/setup.py +++ b/weather_dl_v2/cli/setup.py @@ -8,7 +8,7 @@ install_requires=requirements, version = "0.0.1", author = "aniket", - description = ("This cli tools helps in interacting with weather dl v2 fast API server"), + description = ("This cli tools helps in interacting with weather dl v2 fast API server."), entry_points={ "console_scripts": [ "weather-dl-v2=app.main:app" From 7af5fef47e6fab8e80c817ef5b7e51e914b9aaf7 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Tue, 27 Jun 2023 12:54:35 +0000 Subject: [PATCH 16/51] Added CLI & API Interaction doc. & Implemented absolute priority. --- weather_dl_v2/cli/CLI-Documentation.md | 250 ++++++++++++++++++ weather_dl_v2/cli/README.md | 4 +- weather_dl_v2/cli/app/main.py | 2 +- .../cli/app/services/queue_service.py | 13 + weather_dl_v2/cli/app/subcommands/queue.py | 42 ++- .../fastapi-server/API-Interactions.md | 23 ++ weather_dl_v2/fastapi-server/README.md | 2 +- .../fastapi-server/db_service/database.py | 17 ++ .../fastapi-server/routers/queues.py | 14 +- 9 files changed, 353 insertions(+), 14 deletions(-) create mode 100644 weather_dl_v2/cli/CLI-Documentation.md create mode 100644 weather_dl_v2/fastapi-server/API-Interactions.md diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md new file mode 100644 index 00000000..0fe3093f --- /dev/null +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -0,0 +1,250 @@ +# CLI Documentation +The following doc provides cli commands and their various arguments and options. + +Base Command: +``` +weather-dl-v2 +``` + +## Ping +Ping the FastAPI server and check if it’s live and reachable. + +
+ weather-dl-v2 ping + +##### Usage +``` +weather-dl-v2 ping +``` + +
+ +
+ +## Download +Manage download configs. + +
+ weather-dl-v2 download add
+ Adds a new download config to specific licenses. +
+ + +##### Arguments +> `FILE_PATH` : Path to config file. + +##### Options +> `-l/--license` : License ID to which this download has to be added to. + +##### Usage +``` +weather-dl-v2 download add /path/to/example.cfg –l L1 -l L2 +``` +
+ + +
+ weather-dl-v2 download list
+ List all the active downloads. +
+ +The list can also be filtered out by client_names. + +##### Options +> `--filter` : Filter the list by some key and value. Format of filter filter_key=filter_value + +##### Usage +``` +weather-dl-v2 download list +weather-dl-v2 download list --filter client_name=cds +``` +
+ + +
+ weather-dl-v2 download get
+ Get a particular download by config name. +
+ +##### Arguments +> `CONFIG_NAME` : Name of the download config. + +##### Usage +``` +weather-dl-v2 download get example.cfg +``` +
+ +
+ weather-dl-v2 download remove
+ Remove a download by config name. +
+ +##### Arguments +> `CONFIG_NAME` : Name of the download config. + +##### Usage +``` +weather-dl-v2 download remove example.cfg +``` +
+ +
+ +## License +Manage licenses. + +
+ weather-dl-v2 license add
+ Add a new license. New licenses are added using a json file. +
+ +The json file should be in this format: +``` +{ + "client_name": , + "number_of_requests": , + "secret_id": +} +``` + + +##### Arguments +> `FILE_PATH` : Path to the license json. + +##### Usage +``` +weather-dl-v2 license add /path/to/new-license.json +``` +
+ +
+ weather-dl-v2 license get
+ Get a particular license by license ID. +
+ +##### Arguments +> `LICENSE` : License ID of the license to be fetched. + +##### Usage +``` +weather-dl-v2 license get L1 +``` +
+ +
+ weather-dl-v2 license remove
+ Remove a particular license by license ID. +
+ +##### Arguments +> `LICENSE` : License ID of the license to be removed. + +##### Usage +``` +weather-dl-v2 license remove L1 +``` +
+ +
+ weather-dl-v2 license list
+ List all the licenses available. +
+ + The list can also be filtered by client name. + +##### Options +> `--filter` : Filter the list by some key and value. Format of filter filter_key=filter_value. + +##### Usage +``` +weather-dl-v2 license list +weather-dl-v2 license list --filter client_name=cds +``` +
+ +
+ weather-dl-v2 license update
+ Update an existing license using License ID and a license json. +
+ + The json should be of the same format used to add a new license. + +##### Arguments +> `LICENSE` : License ID of the license to be edited. +> `FILE_PATH` : Path to the license json. + +##### Usage +``` +weather-dl-v2 license edit L1 /path/to/license.json +``` +
+ +
+ +## Queue +Manage all the license queue. + +
+ weather-dl-v2 queue list
+ List all the queues. +
+ + The list can also be filtered by client name. + +##### Options +> `--filter` : Filter the list by some key and value. Format of filter filter_key=filter_value. + +##### Usage +``` +weather-dl-v2 queue list +weather-dl-v2 queue list --filter client_name=cds +``` +
+ +
+ weather-dl-v2 queue get
+ Get a queue by license ID. +
+ + The list can also be filtered by client name. + +##### Arguments +> `LICENSE` : License ID of the queue to be fetched. + +##### Usage +``` +weather-dl-v2 queue get L1 +``` +
+ +
+ weather-dl-v2 queue edit
+ Edit the priority of configs inside queues using edit. +
+ +Priority can be edited in two ways: +1. The new priority queue is passed using a priority json file that should follow the following format: +``` +{ + “priority”: [“c1.cfg”, “c3.cfg”, “c2.cfg”] +} +``` +2. A config file name and its absolute priority can be passed and it updates the priority for that particular config file in the mentioned license queue. + +##### Arguments +> `LICENSE` : License ID of queue to be edited. + +##### Options +> `-f/--file` : Path of the new priority json file. +> `-c/--config` : Config name for absolute priority. +> `-p/--priority`: Absolute priority for the config in a license queue. Priority increases in ascending order with 0 having highest priority. + +##### Usage +``` +weather-dl-v2 queue edit L1 --file /path/to/priority.json +weather-dl-v2 queue edit L1 --config example.cfg --priority 0 +``` +
+ +
\ No newline at end of file diff --git a/weather_dl_v2/cli/README.md b/weather_dl_v2/cli/README.md index a25eb7eb..c5b718be 100644 --- a/weather_dl_v2/cli/README.md +++ b/weather_dl_v2/cli/README.md @@ -20,7 +20,7 @@ ENV BASE_URI=http://:8080 export PROJECT_ID= export REPO= eg:weather-tools -gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-cli" --timeout=79200 --machine-type=e2-highcpu-32 +gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-cli" --timeout=79200 --machine-type=e2-highcpu-32 ``` @@ -52,7 +52,7 @@ gcloud compute instances create-with-container weather-dl-v2-cli \ --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-24 ``` -## Get the weather-dl-v2 docker image id +## To get the weather-dl-v2 cli docker-image id in VM ``` docker images ``` diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py index 498b4929..a94d025a 100644 --- a/weather_dl_v2/cli/app/main.py +++ b/weather_dl_v2/cli/app/main.py @@ -15,7 +15,7 @@ @app.command("ping", help="Check if FastAPI server is live and rechable.") def ping(): - uri = f"{Config().BASE_URI}" + uri = f"{Config().BASE_URI}/" try: x = requests.get(uri) diff --git a/weather_dl_v2/cli/app/services/queue_service.py b/weather_dl_v2/cli/app/services/queue_service.py index 255fd7d1..c49a3986 100644 --- a/weather_dl_v2/cli/app/services/queue_service.py +++ b/weather_dl_v2/cli/app/services/queue_service.py @@ -25,6 +25,11 @@ def _get_queue_by_license(self, license_id: str): def _edit_license_queue(self, license_id: str, priority_list: t.List[str]): pass + @abc.abstractmethod + def _edit_config_absolute_priority(self, license_id: str, config_name: str, priority: int): + pass + + class QueueServiceNetwork(QueueService): def __init__(self): self.endpoint = f"{Config().BASE_URI}/queues" @@ -58,6 +63,14 @@ def _edit_license_queue(self, license_id: str, priority_list: t.List[str]): payload = json.dumps(priority_list) ) + def _edit_config_absolute_priority(self, license_id: str, config_name: str, priority: int): + return network_service.put( + uri = f"{self.endpoint}/priority/{license_id}", + header = {"accept": "application/json"}, + query = {"config_name": config_name, "priority": priority} + ) + + class QueueServiceMock(QueueService): pass diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py index 14c3f685..5e6a75cb 100644 --- a/weather_dl_v2/cli/app/subcommands/queue.py +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -34,19 +34,43 @@ def get_license_queue( ): print(queue_service._get_queue_by_license(license)) -@app.command("edit", help="Edit existing license queue.") +@app.command("edit", help="Edit existing license queue. Queue can edited via a priority file or my moving a single config to a given priority.") def modify_license_queue( license: Annotated[str, typer.Argument(help="License ID.")], - file_path: Annotated[str, typer.Argument(help='''File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}''')] + file: Annotated[str, typer.Option("--file", "-f", help='''File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}''')] = None, + config: Annotated[str, typer.Option("--config", "-c", help="Config name for absolute priority.")] = None, + priority: Annotated[int, typer.Option("--priority", "-p", help="Absolute priority for the config in a license queue. Priority increases in ascending order with 0 having highest priority.")] = None ): - validator = QueueValidator(valid_keys=["priority"]) - try: - data = validator.validate_json(file_path=file_path) - priority_list = data["priority"] - except Exception as e: - print(f"key error: {e}") + if file is None and (config is None and priority is None): + print("Priority file or config name with absolute priority must be passed.") return + + if file and (config or priority): + print("--config & --priority can't be used along with --file argument.") + return + + if file: + validator = QueueValidator(valid_keys=["priority"]) + + try: + data = validator.validate_json(file_path=file) + priority_list = data["priority"] + except Exception as e: + print(f"key error: {e}") + return + print(queue_service._edit_license_queue(license, priority_list)) + return + elif config and priority: + if priority < 0: + print("Priority can not be negative.") + return + + print(queue_service._edit_config_absolute_priority(license, config, priority)) + return + else: + print("--config & --priority arguments should be used together.") + return + - print(queue_service._edit_license_queue(license, priority_list)) diff --git a/weather_dl_v2/fastapi-server/API-Interactions.md b/weather_dl_v2/fastapi-server/API-Interactions.md new file mode 100644 index 00000000..55013377 --- /dev/null +++ b/weather_dl_v2/fastapi-server/API-Interactions.md @@ -0,0 +1,23 @@ +# API Interactions +| Command | Type | Endpoint | +|---|---|---| +| `weather-dl-v2 ping` | `get` | `/` +| Download | | | +| `weather-dl-v2 download add –l ` | `post` | `/download/` | +| `weather-dl-v2 download list` | `get` | `/download/` | +| `weather-dl-v2 download list --filter client_name=` | `get` | `/download?client_name={name}` | +| `weather-dl-v2 download get ` | `get` | `/download/{config_name}` | +| `weather-dl-v2 download remove ` | `delete` | `/download/{config_name}` | +| License | | | +| `weather-dl-v2 license add ` | `post` | `/license/` | +| `weather-dl-v2 license get ` | `get` | `/license/{license_id}` | +| `weather-dl-v2 license remove ` | `delete` | `/license/{license_id}` | +| `weather-dl-v2 license list` | `get` | `/license/` | +| `weather-dl-v2 license list --filter client_name=` | `get` | `/license?client_name={name}` | +| `weather-dl-v2 license edit ` | `put` | `/license/{license_id}` | +| Queue | | | +| `weather-dl-v2 queue list` | `get` | `/queues/` | +| `weather-dl-v2 queue list --filter client_name=` | `get` | `/queues?client_name={name}` | +| `weather-dl-v2 queue get ` | `get` | `/queues/{license_id}` | +| `queue edit --config --priority ` | `post` | `/queues/{license_id}` | +| `queue edit --file ` | `put` | `/queues/priority/{license_id}` | \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 10004215..805b8991 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -31,7 +31,7 @@ gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-server" --t * **Add path of created server image in server.yaml**: ``` -Please write down the fetcher's docker image path at Line 42 of server.yaml. +Please write down the fastAPI server's docker image path at Line 42 of server.yaml. ``` * **Deploy fastapi server on kubernetes:** diff --git a/weather_dl_v2/fastapi-server/db_service/database.py b/weather_dl_v2/fastapi-server/db_service/database.py index 144408a7..990f1fb3 100644 --- a/weather_dl_v2/fastapi-server/db_service/database.py +++ b/weather_dl_v2/fastapi-server/db_service/database.py @@ -59,6 +59,10 @@ def _get_queue_by_client_name(self, client_name: str) -> list: def _update_license_queue(self, license_id: str, priority_list: list) -> None: pass + @abc.abstractmethod + def _update_config_priority__in_license(self, license_id: str, config_name: str, priority: int) -> None: + pass + @abc.abstractmethod def _check_license_exists(self, license_id: str) -> bool: pass @@ -193,6 +197,19 @@ def _update_license_queue(self, license_id: str, priority_list: list) -> None: ) print(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + def _update_config_priority__in_license(self, license_id: str, config_name: str, priority: int) -> None: + snapshot: DocumentSnapshot = self._get_db().collection('queues').document(license_id).get() + priority_list = snapshot.to_dict()['queue'] + if config_name not in priority_list: + print(f"'{config_name}' not in queue.") + raise + new_priority_list = [c for c in priority_list if c != config_name] + new_priority_list.insert(priority, config_name) + result: WriteResult = self._get_db().collection('queues').document(license_id).update( + {'queue': new_priority_list} + ) + print(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") + def _check_license_exists(self, license_id: str) -> bool: result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() return result.exists diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py index 9b8eebee..62271282 100644 --- a/weather_dl_v2/fastapi-server/routers/queues.py +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -30,7 +30,7 @@ async def get_license_queue(license_id: str): return result -# Change config's priority on particular license +# Change priority queue of particular license @router.post("/{license_id}") def modify_license_queue(license_id: str, priority_list: list | None = []): if not db_client._check_license_exists(license_id): @@ -40,3 +40,15 @@ def modify_license_queue(license_id: str, priority_list: list | None = []): return {"message": f"'{license_id}' license priority updated successfully."} except Exception: return {"message": f"Failed to update '{license_id}' license priority."} + + +# Change config's priority in particular license +@router.put("/priority/{license_id}") +def modify_config_priority__in_license(license_id: str, config_name: str, priority: int): + if not db_client._check_license_exists(license_id): + raise HTTPException(status_code=404, detail="License's priority not found.") + try: + db_client._update_config_priority__in_license(license_id, config_name, priority) + return {"message": f"'{license_id}' license '{config_name}' priority updated successfully."} + except Exception: + return {"message": f"Failed to update '{license_id}' license priority."} \ No newline at end of file From 86070d21d4c2ff2024097370b37cac04ebe30787 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Wed, 28 Jun 2023 08:12:16 +0000 Subject: [PATCH 17/51] Removed nginx-server -- not required. --- weather_dl_v2/nginx-server/Dockerfile | 8 ---- weather_dl_v2/nginx-server/README.md | 63 --------------------------- weather_dl_v2/nginx-server/nginx.conf | 10 ----- 3 files changed, 81 deletions(-) delete mode 100644 weather_dl_v2/nginx-server/Dockerfile delete mode 100644 weather_dl_v2/nginx-server/README.md delete mode 100644 weather_dl_v2/nginx-server/nginx.conf diff --git a/weather_dl_v2/nginx-server/Dockerfile b/weather_dl_v2/nginx-server/Dockerfile deleted file mode 100644 index 68b2b666..00000000 --- a/weather_dl_v2/nginx-server/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -# server environment -FROM nginx:alpine -COPY ./nginx.conf /etc/nginx/conf.d/configfile.template - -ENV PORT 8080 -ENV HOST 0.0.0.0 -EXPOSE 8080 -CMD sh -c "envsubst '\$PORT' < /etc/nginx/conf.d/configfile.template > /etc/nginx/conf.d/default.conf && nginx -g 'daemon off;'" \ No newline at end of file diff --git a/weather_dl_v2/nginx-server/README.md b/weather_dl_v2/nginx-server/README.md deleted file mode 100644 index 2dda4090..00000000 --- a/weather_dl_v2/nginx-server/README.md +++ /dev/null @@ -1,63 +0,0 @@ -## Deployment Instructions: - -Due to our org level policy we can't expose external-ip using LoadBalancer Service -while deploying our FastAPI server. - -In case your project don't have any such restriction a -then no need to create a nginx-server on VM to access this fastapi server -instead directly hit the external-ip exposed by LoadBalancer service on kubernetes. - -* **Replace the FastAPI server Pod's IP in nginx.conf**: -``` -Please write down the FastAPI server Pod's IP at Line 8 of nginx.conf. -``` -> Note: Command to get the Pod IP : `kubectl get pods -o wide`. -> -> Though note that in case of Pod restart IP might get change. So we need to look -> for better solution for the same. - -* **Create docker image for nginx-server**: -``` -export PROJECT_ID= -export REPO= eg:weather-tools - -gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-nginx-server" --timeout=79200 --machine-type=e2-highcpu-32 -``` - -* **Create a VM using above craeted docker-image**: -``` -export ZONE= eg: us-cental1-a -export SERVICE_ACCOUNT= # Let's keep this as Compute Engine Default Service Account -export IMAGE_PATH= # The above created image-path - -gcloud compute instances create-with-container weather-dl-v2-nginx-server \ - --project=$PROJECT_ID \ - --zone=$ZONE \ - --machine-type=e2-medium \ - --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default \ - --maintenance-policy=MIGRATE \ - --provisioning-model=STANDARD \ - --service-account=$SERVICE_ACCOUNT \ - --scopes=https://www.googleapis.com/auth/cloud-platform \ - --tags=http-server,https-server \ - --image=projects/cos-cloud/global/images/cos-stable-105-17412-101-4 \ - --boot-disk-size=10GB \ - --boot-disk-type=pd-balanced \ - --boot-disk-device-name=weather-dl-v2-nginx-server \ - --container-image=$IMAGE_PATH \ - --container-restart-policy=on-failure \ - --no-shielded-secure-boot \ - --shielded-vtpm \ - --shielded-integrity-monitoring \ - --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-4 -``` - -* **Hit our fastapi server after doing ssh in the above create VM**: -``` -curl localhost:8080 -``` - -* **Upload config file to fastapi server from VM**: -``` -curl -F file=@example.cfg localhost:8080/uploadfile/ -``` diff --git a/weather_dl_v2/nginx-server/nginx.conf b/weather_dl_v2/nginx-server/nginx.conf deleted file mode 100644 index ca7f5ad1..00000000 --- a/weather_dl_v2/nginx-server/nginx.conf +++ /dev/null @@ -1,10 +0,0 @@ -server { - listen 8080; - listen [::]:8080; - - server_name localhost; - - location / { - proxy_pass http://:8080; - } -} \ No newline at end of file From fbb16cfe3ee65ae3a5a29a3350e4597538e0bd45 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Mon, 17 Jul 2023 22:47:31 +0530 Subject: [PATCH 18/51] dl-v2 server refactor no tests (#358) * Refactored DB layer. * added logger * added logger to new db layer * added mock databases * added mock databases * Added Integration tests for download. * Fixed license handler and router after mc. * Added license integration tests * Added integrations tests for queue. * fixed lint issues * ignore ruff for a test * removed old db_service * refactor license deployment * updated queue modify priority * removed integration tests * fastapi server minor fixes * updated logger * Update main.py * revert pyproject.toml * Update pyproject.toml * Update pyproject.toml * fixes in license deployment * updated license handler for secret manager --- pyproject.toml | 2 +- .../fastapi-server/config_files/example.cfg | 32 --- .../config_processing/manifest.py | 12 +- .../config_processing/partition.py | 6 +- .../config_processing/pipeline.py | 11 +- .../fastapi-server/config_processing/util.py | 9 +- .../fastapi-server/database/__init__.py | 0 .../database/download_handler.py | 65 +++++ .../database/license_handler.py | 164 +++++++++++ .../fastapi-server/database/queue_handler.py | 166 ++++++++++++ .../fastapi-server/database/session.py | 43 +++ .../fastapi-server/db_service/database.py | 254 ------------------ weather_dl_v2/fastapi-server/environment.yml | 2 + .../license_dep/deployment_creator.py | 7 +- weather_dl_v2/fastapi-server/logging.conf | 36 +++ weather_dl_v2/fastapi-server/main.py | 21 +- .../fastapi-server/routers/download.py | 30 ++- .../fastapi-server/routers/license.py | 81 ++++-- .../fastapi-server/routers/queues.py | 39 +-- weather_dl_v2/license_deployment/database.py | 6 +- weather_dl_v2/license_deployment/fetch.py | 4 +- weather_dl_v2/license_deployment/manifest.py | 11 +- weather_dl_v2/license_deployment/util.py | 16 +- 23 files changed, 632 insertions(+), 385 deletions(-) delete mode 100644 weather_dl_v2/fastapi-server/config_files/example.cfg create mode 100644 weather_dl_v2/fastapi-server/database/__init__.py create mode 100644 weather_dl_v2/fastapi-server/database/download_handler.py create mode 100644 weather_dl_v2/fastapi-server/database/license_handler.py create mode 100644 weather_dl_v2/fastapi-server/database/queue_handler.py create mode 100644 weather_dl_v2/fastapi-server/database/session.py delete mode 100644 weather_dl_v2/fastapi-server/db_service/database.py create mode 100644 weather_dl_v2/fastapi-server/logging.conf diff --git a/pyproject.toml b/pyproject.toml index 8e782e8d..2eaadb0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,4 +42,4 @@ target-version = "py310" [tool.ruff.mccabe] # Unlike Flake8, default to a complexity level of 10. -max-complexity = 10 \ No newline at end of file +max-complexity = 10 diff --git a/weather_dl_v2/fastapi-server/config_files/example.cfg b/weather_dl_v2/fastapi-server/config_files/example.cfg deleted file mode 100644 index aa7418ea..00000000 --- a/weather_dl_v2/fastapi-server/config_files/example.cfg +++ /dev/null @@ -1,32 +0,0 @@ -[parameters] -client=mars - -target_path=gs:// None: """Reset Manifest state in preparation for a new transaction.""" @@ -402,7 +404,7 @@ def _get_db(self) -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - print('Initialized Firebase App.') + logger.info('Initialized Firebase App.') if attempts > 4: raise ManifestException('Exceeded number of retries to get firestore client.') from e @@ -432,7 +434,7 @@ def _read(self, location: str) -> DownloadStatus: def _update(self, download_status: DownloadStatus) -> None: """Update or create a download status record.""" - print('Updating Firestore Manifest.') + logger.info('Updating Firestore Manifest.') status = DownloadStatus.to_dict(download_status) doc_id = generate_md5_hash(status['location']) @@ -444,7 +446,7 @@ def _update(self, download_status: DownloadStatus) -> None: result: WriteResult = download_doc_ref.set(status) - print(f'Firestore manifest updated. ' + logger.info(f'Firestore manifest updated. ' f'update_time={result.update_time}, ' f'filename={download_status.location}.') diff --git a/weather_dl_v2/fastapi-server/config_processing/partition.py b/weather_dl_v2/fastapi-server/config_processing/partition.py index bba76e41..282e1fa5 100644 --- a/weather_dl_v2/fastapi-server/config_processing/partition.py +++ b/weather_dl_v2/fastapi-server/config_processing/partition.py @@ -1,3 +1,4 @@ +import logging import copy as cp import dataclasses import itertools @@ -8,6 +9,7 @@ from .config import Config from .stores import Store, FSStore +logger = logging.getLogger(__name__) @dataclasses.dataclass class PartitionConfig(): @@ -61,7 +63,7 @@ def skip_partition(self, config: Config) -> bool: target = prepare_target_name(config) if self.store.exists(target): - print(f'file {target} found, skipping.') + logger.info(f'file {target} found, skipping.') self.manifest.skip(config.config_name, config.dataset, config.selection, target, config.user_id) return True @@ -95,4 +97,4 @@ def update_manifest_collection(self, partition: Config) -> Config: location = prepare_target_name(partition) self.manifest.schedule(partition.config_name, partition.dataset, partition.selection, location, partition.user_id) - print(f'Created partition {location!r}.') + logger.info(f'Created partition {location!r}.') diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index fcb3c402..cca9df59 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -3,10 +3,11 @@ from .parsers import process_config from .partition import PartitionConfig from .manifest import FirestoreManifest -from db_service.database import FirestoreClient - -db_client = FirestoreClient() +from database.download_handler import get_download_handler +from database.queue_handler import get_queue_handler +download_handler = get_download_handler() +queue_handler = get_queue_handler() def start_processing_config(config_file, licenses): config = {} @@ -29,5 +30,5 @@ def start_processing_config(config_file, licenses): partition_obj.update_manifest_collection(partition) # Make entry in 'download' & 'queues' collection. - db_client._start_download(config_name, config.client) - db_client._update_queues_on_start_download(config_name, licenses) + download_handler._start_download(config_name, config.client) + queue_handler._update_queues_on_start_download(config_name, licenses) diff --git a/weather_dl_v2/fastapi-server/config_processing/util.py b/weather_dl_v2/fastapi-server/config_processing/util.py index 7b8b128d..825205ec 100644 --- a/weather_dl_v2/fastapi-server/config_processing/util.py +++ b/weather_dl_v2/fastapi-server/config_processing/util.py @@ -1,3 +1,4 @@ +import logging import datetime import geojson import hashlib @@ -21,6 +22,8 @@ LONGITUDE_RANGE = (-180, 180) GLOBAL_COVERAGE_AREA = [90, -180, -90, 180] +logger = logging.getLogger(__name__) + def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: if isinstance(exception, socket.timeout): @@ -72,7 +75,7 @@ def copy(src: str, dst: str) -> None: try: subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: - print(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + logger.info(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') raise @@ -80,7 +83,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - print('Serializing to JSON') + logger.info('Serializing to JSON') if pd.isna(value) or value is None: return None @@ -181,5 +184,5 @@ def download_with_aria2(url: str, path: str) -> None: check=True, capture_output=True) except subprocess.CalledProcessError as e: - print(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + logger.info(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') raise diff --git a/weather_dl_v2/fastapi-server/database/__init__.py b/weather_dl_v2/fastapi-server/database/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py new file mode 100644 index 00000000..93b476e6 --- /dev/null +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -0,0 +1,65 @@ +import abc +import logging +from firebase_admin import firestore +from google.cloud.firestore_v1 import DocumentSnapshot +from google.cloud.firestore_v1.types import WriteResult +from database.session import get_db + +logger = logging.getLogger(__name__) + +def get_download_handler(): + return DownloadHandlerFirestore(db=get_db()) + +def get_mock_download_handler(): + return DownloadHandlerMock() + +class DownloadHandler(abc.ABC): + @abc.abstractmethod + def _start_download(self, config_name: str, client_name: str) -> None: + pass + + @abc.abstractmethod + def _stop_download(self, config_name: str) -> None: + pass + + @abc.abstractmethod + def _check_download_exists(self, config_name: str) -> bool: + pass + +class DownloadHandlerMock(DownloadHandler): + def __init__(self): + pass + + def _start_download(self, config_name: str, client_name: str) -> None: + logger.info(f"Added {config_name} in 'download' collection. Update_time: 000000.") + + def _stop_download(self, config_name: str) -> None: + logger.info(f"Removed {config_name} in 'download' collection. Update_time: 000000.") + + def _check_download_exists(self, config_name: str) -> bool: + if config_name == "no_exist": + return False + elif config_name == "no_exist.cfg": + return False + else: + return True + +class DownloadHandlerFirestore(DownloadHandler): + def __init__(self, db: firestore.firestore.Client): + self.db = db + self.collection = "download" + + def _start_download(self, config_name: str, client_name: str) -> None: + result: WriteResult = self.db.collection('download').document(config_name).set( + {'config_name': config_name, 'client_name': client_name} + ) + + logger.info(f"Added {config_name} in 'download' collection. Update_time: {result.update_time}.") + + def _stop_download(self, config_name: str) -> None: + timestamp = self.db.collection('download').document(config_name).delete() + logger.info(f"Removed {config_name} in 'download' collection. Update_time: {timestamp}.") + + def _check_download_exists(self, config_name: str) -> bool: + result: DocumentSnapshot = self.db.collection('download').document(config_name).get() + return result.exists diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py new file mode 100644 index 00000000..7a88c7a2 --- /dev/null +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -0,0 +1,164 @@ +import abc +import logging +from firebase_admin import firestore +from google.cloud.firestore_v1 import DocumentSnapshot +from google.cloud.firestore_v1.types import WriteResult +from database.session import get_db + +logger = logging.getLogger(__name__) + +def get_license_handler(): + return LicenseHandlerFirestore(db=get_db()) + +def get_mock_license_handler(): + return LicenseHandlerMock() + +class LicenseHandler(abc.ABC): + @abc.abstractmethod + def _add_license(self, license_dict: dict) -> str: + pass + + @abc.abstractmethod + def _delete_license(self, license_id: str) -> None: + pass + + @abc.abstractmethod + def _check_license_exists(self, license_id: str) -> bool: + pass + + @abc.abstractmethod + def _get_license_by_license_id(self, license_id: str) -> dict: + pass + + @abc.abstractmethod + def _get_license_by_client_name(self, client_name: str) -> list: + pass + + @abc.abstractmethod + def _get_licenses(self) -> list: + pass + + @abc.abstractmethod + def _update_license(self, license_id: str, license_dict: dict) -> None: + pass + + @abc.abstractmethod + def _create_license_queue(self, license_id: str, client_name: str) -> None: + pass + + @abc.abstractmethod + def _remove_license_queue(self, license_id: str) -> None: + pass + +class LicenseHandlerMock(LicenseHandler): + def __init__(self): + pass + + def _add_license(self, license_dict: dict) -> str: + license_id = "L1" + logger.info(f"Added {license_id} in 'license' collection. Update_time: 00000.") + return license_id + + def _delete_license(self, license_id: str) -> None: + logger.info(f"Removed {license_id} in 'license' collection. Update_time: 00000.") + + def _update_license(self, license_id: str, license_dict: dict) -> None: + logger.info(f"Updated {license_id} in 'license' collection. Update_time: 00000.") + + def _check_license_exists(self, license_id: str) -> bool: + if license_id == "no_exists": + return False + else: + return True + + def _get_license_by_license_id(self, license_id: str) -> dict: + if license_id == "no_exists": + return None + return { + "license_id": license_id, + "secret_id": "xxxx", + "client_name": "dummy_client", + "k8s_deployment_id": "k1", + "number_of_requets": 100 + } + + def _get_license_by_client_name(self, client_name: str) -> list: + return [{ + "license_id": "L1", + "secret_id": "xxxx", + "client_name": client_name, + "k8s_deployment_id": "k1", + "number_of_requets": 100 + }] + + def _get_licenses(self) -> list: + return [ + { + "license_id": "L1", + "secret_id": "xxxx", + "client_name": "dummy_client", + "k8s_deployment_id": "k1", + "number_of_requets": 100 + } + ] + + def _create_license_queue(self, license_id: str, client_name: str) -> None: + logger.info("Added L1 queue in 'queues' collection. Update_time: 00000.") + + def _remove_license_queue(self, license_id: str) -> None: + logger.info("Removed L1 queue in 'queues' collection. Update_time: 00000.") + +class LicenseHandlerFirestore(LicenseHandler): + def __init__(self, db: firestore.firestore.Client): + self.db = db + self.collection = "license" + + # TODO: find alternative way to create license_id + def _add_license(self, license_dict: dict) -> str: + license_id = f"L{len(self.db.collection(self.collection).get()) + 1}" + license_dict["license_id"] = license_id + result: WriteResult = self.db.collection(self.collection).document(license_id).set( + license_dict + ) + logger.info(f"Added {license_id} in 'license' collection. Update_time: {result.update_time}.") + return license_id + + def _delete_license(self, license_id: str) -> None: + timestamp = self.db.collection(self.collection).document(license_id).delete() + logger.info(f"Removed {license_id} in 'license' collection. Update_time: {timestamp}.") + + def _update_license(self, license_id: str, license_dict: dict) -> None: + result: WriteResult = self.db.collection(self.collection).document(license_id).update(license_dict) + logger.info(f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}.") + + def _check_license_exists(self, license_id: str) -> bool: + result: DocumentSnapshot = self.db.collection(self.collection).document(license_id).get() + return result.exists + + def _get_license_by_license_id(self, license_id: str) -> dict: + result: DocumentSnapshot = self.db.collection(self.collection).document(license_id).get() + return result.to_dict() + + def _get_license_by_client_name(self, client_name: str) -> list: + snapshot_list = self.db.collection(self.collection).where('client_name', '==', client_name).get() + result = [] + for snapshot in snapshot_list: + result.append(snapshot.to_dict()) + return result + + def _get_licenses(self) -> list: + snapshot_list = self.db.collection(self.collection).get() + result = [] + for snapshot in snapshot_list: + result.append(self.db.collection(self.collection).document(snapshot.id).get().to_dict()) + return result + + def _create_license_queue(self, license_id: str, client_name: str) -> None: + result: WriteResult = self.db.collection('queues').document(license_id).set( + {"license_id": license_id, "client_name": client_name, "queue": []} + ) + logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _remove_license_queue(self, license_id: str) -> None: + timestamp = self.db.collection('queues').document(license_id).delete() + logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py new file mode 100644 index 00000000..a7edf5f9 --- /dev/null +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -0,0 +1,166 @@ +import abc +import logging +from firebase_admin import firestore +from google.cloud.firestore_v1 import DocumentSnapshot +from google.cloud.firestore_v1.types import WriteResult +from database.session import get_db + +logger = logging.getLogger(__name__) + +def get_queue_handler(): + return QueueHandlerFirestore(db=get_db()) + +def get_mock_queue_handler(): + return QueueHandlerMock() + +class QueueHandler(abc.ABC): + @abc.abstractmethod + def _create_license_queue(self, license_id: str, client_name: str) -> None: + pass + + @abc.abstractmethod + def _remove_license_queue(self, license_id: str) -> None: + pass + + @abc.abstractmethod + def _get_queues(self) -> list: + pass + + @abc.abstractmethod + def _get_queue_by_license_id(self, license_id: str) -> dict: + pass + + @abc.abstractmethod + def _get_queue_by_client_name(self, client_name: str) -> list: + pass + + @abc.abstractmethod + def _update_license_queue(self, license_id: str, priority_list: list) -> None: + pass + + @abc.abstractmethod + def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: + pass + + @abc.abstractmethod + def _update_queues_on_stop_download(self, config_name: str) -> None: + pass + + @abc.abstractmethod + def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: + pass + +class QueueHandlerMock(QueueHandler): + def __init__(self): + pass + + def _create_license_queue(self, license_id: str, client_name: str) -> None: + logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: 000000.") + + def _remove_license_queue(self, license_id: str) -> None: + logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: 000000.") + + def _get_queues(self) -> list: + return [ + { + "client_name": "dummy_client", + "license_id": "L1", + "queue": [] + } + ] + + def _get_queue_by_license_id(self, license_id: str) -> dict: + if license_id == "no_exists": + return None + return { + "client_name": "dummy_client", + "license_id": license_id, + "queue": [] + } + + def _get_queue_by_client_name(self, client_name: str) -> list: + return [ + { + "client_name": client_name, + "license_id": "L1", + "queue": [] + } + ] + + def _update_license_queue(self, license_id: str, priority_list: list) -> None: + logger.info(f"Updated {license_id} queue in 'queues' collection. Update_time: 00000.") + + def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: + logger.info(f"Updated {license} queue in 'queues' collection. Update_time: 00000.") + + def _update_queues_on_stop_download(self, config_name: str) -> None: + logger.info("Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") + + def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: + print(f"Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") + +class QueueHandlerFirestore(QueueHandler): + def __init__(self, db: firestore.firestore.Client, collection: str = "queues"): + self.db = db + self.collection = collection + + def _create_license_queue(self, license_id: str, client_name: str) -> None: + result: WriteResult = self.db.collection(self.collection).document(license_id).set( + {"license_id": license_id, "client_name": client_name, "queue": []} + ) + logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _remove_license_queue(self, license_id: str) -> None: + timestamp = self.db.collection(self.collection).document(license_id).delete() + logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") + + def _get_queues(self) -> list: + snapshot_list = self.db.collection(self.collection).get() + result = [] + for snapshot in snapshot_list: + result.append(self.db.collection(self.collection).document(snapshot.id).get().to_dict()) + return result + + def _get_queue_by_license_id(self, license_id: str) -> dict: + result: DocumentSnapshot = self.db.collection(self.collection).document(license_id).get() + return result.to_dict() + + def _get_queue_by_client_name(self, client_name: str) -> list: + snapshot_list = self.db.collection(self.collection).where('client_name', '==', client_name).get() + result = [] + for snapshot in snapshot_list: + result.append(snapshot.to_dict()) + return result + + def _update_license_queue(self, license_id: str, priority_list: list) -> None: + result: WriteResult = self.db.collection(self.collection).document(license_id).update( + {'queue': priority_list} + ) + logger.info(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: + for license in licenses: + result: WriteResult = self.db.collection(self.collection).document(license).update( + {'queue': firestore.ArrayUnion([config_name])} + ) + logger.info(f"Updated {license} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _update_queues_on_stop_download(self, config_name: str) -> None: + snapshot_list = self.db.collection(self.collection).get() + for snapshot in snapshot_list: + result: WriteResult = self.db.collection(self.collection).document(snapshot.id).update({ + 'queue': firestore.ArrayRemove([config_name])}) + logger.info(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: + snapshot: DocumentSnapshot = self.db.collection('queues').document(license_id).get() + priority_list = snapshot.to_dict()['queue'] + if config_name not in priority_list: + print(f"'{config_name}' not in queue.") + raise + new_priority_list = [c for c in priority_list if c != config_name] + new_priority_list.insert(priority, config_name) + result: WriteResult = self.db.collection('queues').document(license_id).update( + {'queue': new_priority_list} + ) + print(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") diff --git a/weather_dl_v2/fastapi-server/database/session.py b/weather_dl_v2/fastapi-server/database/session.py new file mode 100644 index 00000000..564cafe7 --- /dev/null +++ b/weather_dl_v2/fastapi-server/database/session.py @@ -0,0 +1,43 @@ +import time +import abc +import logging +import firebase_admin +from firebase_admin import firestore +from firebase_admin import credentials +from config_processing.util import get_wait_interval + +logger = logging.getLogger(__name__) + +class Database(abc.ABC): + @abc.abstractmethod + def _get_db(self): + pass + +def get_db() -> firestore.firestore.Client: + """Acquire a firestore client, initializing the firebase app if necessary. + Will attempt to get the db client five times. If it's still unsuccessful, a + `ManifestException` will be raised. + """ + db = None + attempts = 0 + + while db is None: + try: + db = firestore.client() + except ValueError as e: + # The above call will fail with a value error when the firebase app is not initialized. + # Initialize the app here, and try again. + # Use the application default credentials. + cred = credentials.ApplicationDefault() + + firebase_admin.initialize_app(cred) + logger.info('Initialized Firebase App.') + + if attempts > 4: + raise RuntimeError('Exceeded number of retries to get firestore client.') from e + + time.sleep(get_wait_interval(attempts)) + + attempts += 1 + + return db diff --git a/weather_dl_v2/fastapi-server/db_service/database.py b/weather_dl_v2/fastapi-server/db_service/database.py deleted file mode 100644 index 990f1fb3..00000000 --- a/weather_dl_v2/fastapi-server/db_service/database.py +++ /dev/null @@ -1,254 +0,0 @@ -import abc -import time -import firebase_admin -from firebase_admin import firestore -from firebase_admin import credentials -from google.cloud.firestore_v1 import DocumentSnapshot -from google.cloud.firestore_v1.types import WriteResult -from config_processing.util import get_wait_interval - - -class Database(abc.ABC): - @abc.abstractmethod - def _get_db(self): - pass - - -class CRUDOperations(abc.ABC): - @abc.abstractmethod - def _start_download(self, config_name: str, client_name: str) -> None: - pass - - @abc.abstractmethod - def _stop_download(self, config_name: str) -> None: - pass - - @abc.abstractmethod - def _check_download_exists(self, config_name: str) -> bool: - pass - - @abc.abstractmethod - def _add_license(self, license_dict: dict) -> str: - pass - - @abc.abstractmethod - def _delete_license(self, license_id: str) -> str: - pass - - @abc.abstractmethod - def _create_license_queue(self, license_id: str, client_name: str) -> None: - pass - - @abc.abstractmethod - def _remove_license_queue(self, license_id: str) -> None: - pass - - @abc.abstractmethod - def _get_queues(self) -> list: - pass - - @abc.abstractmethod - def _get_queue_by_license_id(self, license_id: str) -> dict: - pass - - @abc.abstractmethod - def _get_queue_by_client_name(self, client_name: str) -> list: - pass - - @abc.abstractmethod - def _update_license_queue(self, license_id: str, priority_list: list) -> None: - pass - - @abc.abstractmethod - def _update_config_priority__in_license(self, license_id: str, config_name: str, priority: int) -> None: - pass - - @abc.abstractmethod - def _check_license_exists(self, license_id: str) -> bool: - pass - - @abc.abstractmethod - def _get_license_by_license_id(slef, license_id: str) -> dict: - pass - - @abc.abstractmethod - def _get_license_by_client_name(self, client_name: str) -> list: - pass - - @abc.abstractmethod - def _get_licenses(self) -> list: - pass - - @abc.abstractmethod - def _update_license(self, license_id: str, license_dict: dict) -> None: - pass - - # TODO: Find better way to execute these query. - # @abc.abstractmethod - # def _get_download_by_config_name(self, config_name: str) -> dict: - # pass - - # @abc.abstractmethod - # def _get_dowloads(self) -> list: - # pass - - @abc.abstractmethod - def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: - pass - - @abc.abstractmethod - def _update_queues_on_stop_download(self, config_name: str) -> None: - pass - - -class FirestoreClient(Database, CRUDOperations): - def _get_db(self) -> firestore.firestore.Client: - """Acquire a firestore client, initializing the firebase app if necessary. - Will attempt to get the db client five times. If it's still unsuccessful, a - `ManifestException` will be raised. - """ - db = None - attempts = 0 - - while db is None: - try: - db = firestore.client() - except ValueError as e: - # The above call will fail with a value error when the firebase app is not initialized. - # Initialize the app here, and try again. - # Use the application default credentials. - cred = credentials.ApplicationDefault() - - firebase_admin.initialize_app(cred) - print('Initialized Firebase App.') - - if attempts > 4: - raise RuntimeError('Exceeded number of retries to get firestore client.') from e - - time.sleep(get_wait_interval(attempts)) - - attempts += 1 - - return db - - def _start_download(self, config_name: str, client_name: str) -> None: - result: WriteResult = self._get_db().collection('download').document(config_name).set( - {'config_name': config_name, 'client_name': client_name} - ) - - print(f"Added {config_name} in 'download' collection. Update_time: {result.update_time}.") - - def _stop_download(self, config_name: str) -> None: - timestamp = self._get_db().collection('download').document(config_name).delete() - print(f"Removed {config_name} in 'download' collection. Update_time: {timestamp}.") - - def _check_download_exists(self, config_name: str) -> bool: - result: DocumentSnapshot = self._get_db().collection('download').document(config_name).get() - return result.exists - - def _add_license(self, license_dict: dict) -> str: - license_id = f"L{len(self._get_db().collection('license').get()) + 1}" - license_dict["license_id"] = license_id - result: WriteResult = self._get_db().collection('license').document(license_id).set( - license_dict - ) - print(f"Added {license_id} in 'license' collection. Update_time: {result.update_time}.") - return license_id - - def _delete_license(self, license_id: str) -> None: - timestamp = self._get_db().collection('license').document(license_id).delete() - print(f"Removed {license_id} in 'license' collection. Update_time: {timestamp}.") - - def _update_license(self, license_id: str, license_dict: dict) -> None: - result: WriteResult = self._get_db().collection('license').document(license_id).update(license_dict) - print(f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}.") - - def _create_license_queue(self, license_id: str, client_name: str) -> None: - result: WriteResult = self._get_db().collection('queues').document(license_id).set( - {"license_id": license_id, "client_name": client_name, "queue": []} - ) - print(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") - - def _remove_license_queue(self, license_id: str) -> None: - timestamp = self._get_db().collection('queues').document(license_id).delete() - print(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") - - def _get_queues(self) -> list: - snapshot_list = self._get_db().collection('queues').get() - result = [] - for snapshot in snapshot_list: - result.append(self._get_db().collection('queues').document(snapshot.id).get().to_dict()) - return result - - def _get_queue_by_license_id(self, license_id: str) -> dict: - result: DocumentSnapshot = self._get_db().collection('queues').document(license_id).get() - return result.to_dict() - - def _get_queue_by_client_name(self, client_name: str) -> list: - snapshot_list = self._get_db().collection('queues').where('client_name', '==', client_name).get() - result = [] - for snapshot in snapshot_list: - result.append(snapshot.to_dict()) - return result - - def _update_license_queue(self, license_id: str, priority_list: list) -> None: - result: WriteResult = self._get_db().collection('queues').document(license).update( - {'queue': priority_list} - ) - print(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") - - def _update_config_priority__in_license(self, license_id: str, config_name: str, priority: int) -> None: - snapshot: DocumentSnapshot = self._get_db().collection('queues').document(license_id).get() - priority_list = snapshot.to_dict()['queue'] - if config_name not in priority_list: - print(f"'{config_name}' not in queue.") - raise - new_priority_list = [c for c in priority_list if c != config_name] - new_priority_list.insert(priority, config_name) - result: WriteResult = self._get_db().collection('queues').document(license_id).update( - {'queue': new_priority_list} - ) - print(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") - - def _check_license_exists(self, license_id: str) -> bool: - result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() - return result.exists - - def _get_license_by_license_id(self, license_id: str) -> dict: - result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() - return result.to_dict() - - def _get_license_by_client_name(self, client_name: str) -> list: - snapshot_list = self._get_db().collection('license').where('client_name', '==', client_name).get() - result = [] - for snapshot in snapshot_list: - result.append(snapshot.to_dict()) - return result - - def _get_licenses(self) -> list: - snapshot_list = self._get_db().collection('license').get() - result = [] - for snapshot in snapshot_list: - result.append(self._get_db().collection('license').document(snapshot.id).get().to_dict()) - return result - - # def _get_download_by_config_name(self, config_name: str) -> dict: - # result: DocumentSnapshot = self._get_db().collection('download_status').document(config_name).get() - # return result.to_dict() - - # def _get_dowloads(self) -> list: - # pass - - def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: - for license in licenses: - result: WriteResult = self._get_db().collection('queues').document(license).update( - {'queue': firestore.ArrayUnion([config_name])} - ) - print(f"Updated {license} queue in 'queues' collection. Update_time: {result.update_time}.") - - def _update_queues_on_stop_download(self, config_name: str) -> None: - snapshot_list = self._get_db().collection('queues').get() - for snapshot in snapshot_list: - result: WriteResult = self._get_db().collection('queues').document(snapshot.id).update({ - 'queue': firestore.ArrayRemove([config_name])}) - print(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") diff --git a/weather_dl_v2/fastapi-server/environment.yml b/weather_dl_v2/fastapi-server/environment.yml index 3b18f525..b7aee824 100644 --- a/weather_dl_v2/fastapi-server/environment.yml +++ b/weather_dl_v2/fastapi-server/environment.yml @@ -6,6 +6,7 @@ dependencies: - xarray - geojson - pip=22.3 + - google-cloud-sdk=410.0.0 - pip: - kubernetes - fastapi[all] @@ -14,3 +15,4 @@ dependencies: - apache-beam[gcp] - aiohttp - firebase-admin + - gcloud diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py index 1928d8f9..6a285fdf 100644 --- a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -1,6 +1,5 @@ from os import path import yaml -import uuid from kubernetes import client, config @@ -11,8 +10,8 @@ def create_license_deployment(license_id: str) -> str: with open(path.join(path.dirname(__file__), "license_deployment.yaml")) as f: deployment_manifest = yaml.safe_load(f) deployment_name = f"weather-dl-v2-license-dep-{license_id}".lower() - - # Update the deployment name with a unique identifier + + # Update the deployment name with a unique identifier deployment_manifest["metadata"]["name"] = deployment_name deployment_manifest["spec"]["template"]["spec"]["containers"][0]["args"] = ["--license", license_id] @@ -38,4 +37,4 @@ def terminate_license_deployment(license_id: str) -> None: # Delete the deployment api_instance.delete_namespaced_deployment(name=deployment_name, namespace="default") - print(f"Deployment '{deployment_name}' deleted successfully.") \ No newline at end of file + print(f"Deployment '{deployment_name}' deleted successfully.") diff --git a/weather_dl_v2/fastapi-server/logging.conf b/weather_dl_v2/fastapi-server/logging.conf new file mode 100644 index 00000000..ed0a5e29 --- /dev/null +++ b/weather_dl_v2/fastapi-server/logging.conf @@ -0,0 +1,36 @@ +[loggers] +keys=root,server + +[handlers] +keys=consoleHandler,detailedConsoleHandler + +[formatters] +keys=normalFormatter,detailedFormatter + +[logger_root] +level=INFO +handlers=consoleHandler + +[logger_server] +level=DEBUG +handlers=detailedConsoleHandler +qualname=server +propagate=0 + +[handler_consoleHandler] +class=StreamHandler +level=DEBUG +formatter=normalFormatter +args=(sys.stdout,) + +[handler_detailedConsoleHandler] +class=StreamHandler +level=DEBUG +formatter=detailedFormatter +args=(sys.stdout,) + +[formatter_normalFormatter] +format=%(asctime)s loglevel=%(levelname)-6s logger=%(name)s %(funcName)s() msg:%(message)s + +[formatter_detailedFormatter] +format=%(asctime)s loglevel=%(levelname)-6s logger=%(name)s %(funcName)s() msg:%(message)s call_trace=%(pathname)s L%(lineno)-4d \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index d7a0df85..01dedfc8 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -1,15 +1,23 @@ +import logging +import os +import logging.config from contextlib import asynccontextmanager from fastapi import FastAPI -from fastapi.responses import HTMLResponse from routers import license, download, queues +ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + +# set up logger. +logging.config.fileConfig('logging.conf', disable_existing_loggers=False) +logger = logging.getLogger(__name__) @asynccontextmanager async def lifespan(app: FastAPI): + logger.info("Started FastAPI server") # Boot up # TODO: Replace hard-coded collection name by read a server config. - print("TODO: Create database if not already exists.") - print("TODO: Retrieve license information & create license deployment if needed.") + logger.info("Create database if not already exists.") + logger.info("Retrieve license information & create license deployment if needed.") yield # Clean up @@ -22,9 +30,4 @@ async def lifespan(app: FastAPI): @app.get("/") async def main(): - content = """ - -Greetings from weather-dl v2 !! - - """ - return HTMLResponse(content=content) + return {"msg": "Greetings from weather-dl v2 !!"} diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 3e173c14..00252e9e 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -1,10 +1,9 @@ -from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile -from db_service.database import FirestoreClient +from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, Depends from config_processing.pipeline import start_processing_config +from database.download_handler import DownloadHandler, get_download_handler +from database.queue_handler import QueueHandler, get_queue_handler import shutil -db_client = FirestoreClient() - router = APIRouter( prefix="/download", tags=["download"], @@ -22,11 +21,12 @@ def upload(file: UploadFile): # Can submit a config to the server. @router.post("/") def submit_download(file: UploadFile | None = None, licenses: list = [], - background_tasks: BackgroundTasks = BackgroundTasks()): + background_tasks: BackgroundTasks = BackgroundTasks(), + download_handler: DownloadHandler = Depends(get_download_handler)): if not file: - return {"message": "No upload file sent."} + raise HTTPException(status_code=404, detail="No upload file sent.") else: - if db_client._check_download_exists(file.filename): + if download_handler._check_download_exists(file.filename): raise HTTPException(status_code=400, detail=f"Please stop the ongoing download of the config file '{file.filename}' " "before attempting to start a new download.") @@ -36,7 +36,7 @@ def submit_download(file: UploadFile | None = None, licenses: list = [], background_tasks.add_task(start_processing_config, dest, licenses) return {"message": f"file '{file.filename}' saved at '{dest}' successfully."} except Exception: - return {"message": f"Failed to save file '{file.filename}'."} + raise HTTPException(status_code=500, detail=f"Failed to save file '{file.filename}'.") # Can check the current status of the submitted config. @@ -61,8 +61,8 @@ async def get_downloads(client_name: str | None = None): # Get status of particular download @router.get("/{config_name}") -async def get_download(config_name: str): - if not db_client._check_download_exists(config_name): +async def get_download(config_name: str, download_handler: DownloadHandler = Depends(get_download_handler)): + if not download_handler._check_download_exists(config_name): raise HTTPException(status_code=404, detail="Download config not found in weather-dl v2.") # Get this kind of response by querying fake_manifest_db. @@ -73,10 +73,12 @@ async def get_download(config_name: str): # Stop & remove the execution of the config. @router.delete("/{config_name}") -async def delete_download(config_name: str): - if not db_client._check_download_exists(config_name): +async def delete_download(config_name: str, + download_handler: DownloadHandler = Depends(get_download_handler), + queue_handler: QueueHandler = Depends(get_queue_handler)): + if not download_handler._check_download_exists(config_name): raise HTTPException(status_code=404, detail="No such download config to stop & remove.") - db_client._stop_download(config_name) - db_client._update_queues_on_stop_download(config_name) + download_handler._stop_download(config_name) + queue_handler._update_queues_on_stop_download(config_name) return {"config_name": config_name, "message": "Download config stopped & removed successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 2a796daa..486c3f49 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -1,9 +1,7 @@ -from fastapi import APIRouter, HTTPException, BackgroundTasks +from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends from pydantic import BaseModel -from db_service.database import FirestoreClient from license_dep.deployment_creator import create_license_deployment, terminate_license_deployment - -db_client = FirestoreClient() +from database.license_handler import LicenseHandler, get_license_handler # TODO: Make use of google secret manager. @@ -25,21 +23,40 @@ class LicenseInternal(License): responses={404: {"description": "Not found"}}, ) +def get_create_deployment(): + def create_deployment(license_id: str): + k8s_deployment_id = create_license_deployment(license_id) + update_license_internal(license_id, k8s_deployment_id) + return create_deployment + +def get_create_deployment_mock(): + def create_deployment_mock(license_id: str): + print("create deployment mocked") + return create_deployment_mock + +def get_terminate_license_deployment(): + return terminate_license_deployment + +def get_terminate_license_deployment_mock(): + def get_terminate_license_deployment_mock(license_id): + print(f"terminating license deployment for {license_id}") + return get_terminate_license_deployment_mock + # List all the license + handle filters of {client_name} @router.get("/") -async def get_licenses(client_name: str | None = None): +async def get_licenses(client_name: str | None = None, license_handler: LicenseHandler = Depends(get_license_handler)): if client_name: - result = db_client._get_license_by_client_name(client_name) + result = license_handler._get_license_by_client_name(client_name) else: - result = db_client._get_licenses() + result = license_handler._get_licenses() return result # Get particular license @router.get("/{license_id}") -async def get_license_by_license_id(license_id: str): - result = db_client._get_license_by_license_id(license_id) +async def get_license_by_license_id(license_id: str, license_handler: LicenseHandler = Depends(get_license_handler)): + result = license_handler._get_license_by_license_id(license_id) if not result: raise HTTPException(status_code=404, detail="License not found.") return result @@ -47,12 +64,17 @@ async def get_license_by_license_id(license_id: str): # Update existing license @router.put("/{license_id}") -async def update_license(license_id: str, license: License): - if not db_client._check_license_exists(license_id): +async def update_license(license_id: str, + license: License, + license_handler: LicenseHandler = Depends(get_license_handler), + create_deployment = Depends(get_create_deployment), + terminate_license_deployment = Depends(get_terminate_license_deployment) + ): + if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to update.") license_dict = license.dict() - db_client._update_license(license_id, license_dict) + license_handler._update_license(license_id, license_dict) terminate_license_deployment(license_id) create_deployment(license_id) @@ -60,37 +82,40 @@ async def update_license(license_id: str, license: License): # Add/Update k8s deployment ID for existing license (intenally). -def update_license_internal(license_id: str, k8s_deployment_id: str): - if not db_client._check_license_exists(license_id): +def update_license_internal(license_id: str, + k8s_deployment_id: str, + license_handler: LicenseHandler = Depends(get_license_handler)): + if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to update.") license_dict = {"k8s_deployment_id": k8s_deployment_id} - db_client._update_license(license_id, license_dict) + license_handler._update_license(license_id, license_dict) return {"license_id": license_id, "message": "License updated successfully."} - -def create_deployment(license_id: str): - k8s_deployment_id = create_license_deployment(license_id) - update_license_internal(license_id, k8s_deployment_id) - - # Add new license @router.post("/") -async def add_license(license: License, background_tasks: BackgroundTasks = BackgroundTasks()): +async def add_license(license: License, + background_tasks: BackgroundTasks = BackgroundTasks(), + license_handler: LicenseHandler = Depends(get_license_handler), + create_deployment = Depends(get_create_deployment)): license_dict = license.dict() license_dict['k8s_deployment_id'] = "" - license_id = db_client._add_license(license_dict) - db_client._create_license_queue(license_id, license_dict['client_name']) + license_id = license_handler._add_license(license_dict) + license_handler._create_license_queue(license_id, license_dict['client_name']) background_tasks.add_task(create_deployment, license_id) return {"license_id": license_id, "message": "License added successfully."} # Remove license @router.delete("/{license_id}") -async def delete_license(license_id: str, background_tasks: BackgroundTasks = BackgroundTasks()): - if not db_client._check_license_exists(license_id): +async def delete_license(license_id: str, + background_tasks: BackgroundTasks = BackgroundTasks(), + license_handler: LicenseHandler = Depends(get_license_handler), + terminate_license_deployment = Depends(get_terminate_license_deployment) + ): + if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to delete.") - db_client._delete_license(license_id) - db_client._remove_license_queue(license_id) + license_handler._delete_license(license_id) + license_handler._remove_license_queue(license_id) background_tasks.add_task(terminate_license_deployment, license_id) return {"license_id": license_id, "message": "License removed successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py index 62271282..4056ddf6 100644 --- a/weather_dl_v2/fastapi-server/routers/queues.py +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -1,7 +1,6 @@ -from fastapi import APIRouter, HTTPException -from db_service.database import FirestoreClient - -db_client = FirestoreClient() +from fastapi import APIRouter, HTTPException, Depends +from database.queue_handler import QueueHandler, get_queue_handler +from database.license_handler import LicenseHandler, get_license_handler router = APIRouter( prefix="/queues", @@ -13,18 +12,19 @@ # Users can change the execution order of config per license basis. # List the licenses priority + {client_name} filter @router.get("/") -async def get_all_license_queue(client_name: str | None = None): +async def get_all_license_queue(client_name: str | None = None, + queue_handler: QueueHandler = Depends(get_queue_handler)): if client_name: - result = db_client._get_queue_by_client_name(client_name) + result = queue_handler._get_queue_by_client_name(client_name) else: - result = db_client._get_queues() + result = queue_handler._get_queues() return result # Get particular license priority @router.get("/{license_id}") -async def get_license_queue(license_id: str): - result = db_client._get_queue_by_license_id(license_id) +async def get_license_queue(license_id: str, queue_handler: QueueHandler = Depends(get_queue_handler)): + result = queue_handler._get_queue_by_license_id(license_id) if not result: raise HTTPException(status_code=404, detail="License's priority not found.") return result @@ -32,11 +32,14 @@ async def get_license_queue(license_id: str): # Change priority queue of particular license @router.post("/{license_id}") -def modify_license_queue(license_id: str, priority_list: list | None = []): - if not db_client._check_license_exists(license_id): +def modify_license_queue(license_id: str, priority_list: list | None = [], + queue_handler: QueueHandler = Depends(get_queue_handler), + license_handler: LicenseHandler = Depends(get_license_handler) + ): + if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="License's priority not found.") try: - db_client._update_license_queue(license_id, priority_list) + queue_handler._update_license_queue(license_id, priority_list) return {"message": f"'{license_id}' license priority updated successfully."} except Exception: return {"message": f"Failed to update '{license_id}' license priority."} @@ -44,11 +47,13 @@ def modify_license_queue(license_id: str, priority_list: list | None = []): # Change config's priority in particular license @router.put("/priority/{license_id}") -def modify_config_priority__in_license(license_id: str, config_name: str, priority: int): - if not db_client._check_license_exists(license_id): +def modify_config_priority_in_license(license_id: str, config_name: str, priority: int, + queue_handler: QueueHandler = Depends(get_queue_handler), + license_handler: LicenseHandler = Depends(get_license_handler)): + if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="License's priority not found.") try: - db_client._update_config_priority__in_license(license_id, config_name, priority) + queue_handler._update_config_priority_in_license(license_id, config_name, priority) return {"message": f"'{license_id}' license '{config_name}' priority updated successfully."} - except Exception: - return {"message": f"Failed to update '{license_id}' license priority."} \ No newline at end of file + except Exception as e: + return {"message": f"Failed to update '{license_id}' license priority."} diff --git a/weather_dl_v2/license_deployment/database.py b/weather_dl_v2/license_deployment/database.py index a165199d..b2d1c525 100644 --- a/weather_dl_v2/license_deployment/database.py +++ b/weather_dl_v2/license_deployment/database.py @@ -1,5 +1,6 @@ import abc import time +import logging import firebase_admin from firebase_admin import firestore from firebase_admin import credentials @@ -8,6 +9,7 @@ from google.cloud.firestore_v1.base_query import FieldFilter, And from util import get_wait_interval +logger = logging.getLogger(__name__) class Database(abc.ABC): @abc.abstractmethod @@ -51,7 +53,7 @@ def _get_db(self) -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - print('Initialized Firebase App.') + logger.info('Initialized Firebase App.') if attempts > 4: raise RuntimeError('Exceeded number of retries to get firestore client.') from e @@ -81,7 +83,7 @@ def _get_partition_from_manifest(self, config_name: str) -> str | None: def _remove_config_from_license_queue(self, license_id: str, config_name: str) -> None: result: WriteResult = self._get_db().collection('queues').document(license_id).update({ 'queue': firestore.ArrayRemove([config_name])}) - print(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + logger.info(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") @firestore.transactional diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index 67d97ae1..b51e9657 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -10,6 +10,7 @@ from job_creator import create_download_job from clients import CLIENTS from manifest import FirestoreManifest +from util import exceptionit db_client = FirestoreClient() secretmanager_client = secretmanager.SecretManagerServiceClient() @@ -30,7 +31,7 @@ def create_job(request, result): logger.info(f"Creating download job for res: {data_str}") create_download_job(data_str) - +@exceptionit def make_fetch_request(request): client = CLIENTS[client_name](request['dataset']) manifest = FirestoreManifest() @@ -46,7 +47,6 @@ def make_fetch_request(request): create_job(request, result) - def fetch_request_from_db(): request = None config_name = db_client._get_config_from_queue_by_license_id(license_id) diff --git a/weather_dl_v2/license_deployment/manifest.py b/weather_dl_v2/license_deployment/manifest.py index 8d2e2c2b..93e965a4 100644 --- a/weather_dl_v2/license_deployment/manifest.py +++ b/weather_dl_v2/license_deployment/manifest.py @@ -1,6 +1,7 @@ """Client interface for connecting to a manifest.""" import abc +import logging import dataclasses import datetime import enum @@ -27,6 +28,8 @@ from database import Database +logger = logging.getLogger(__name__) + """An implementation-dependent Manifest URI.""" Location = t.NewType('Location', str) @@ -276,7 +279,7 @@ def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, upload_end_time=current_utc_time, ) self._update(status) - print(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + logger.info(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: """Reset Manifest state in preparation for a new transaction.""" @@ -404,7 +407,7 @@ def _get_db(self) -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - print('Initialized Firebase App.') + logger.info('Initialized Firebase App.') if attempts > 4: raise ManifestException('Exceeded number of retries to get firestore client.') from e @@ -434,7 +437,7 @@ def _read(self, location: str) -> DownloadStatus: def _update(self, download_status: DownloadStatus) -> None: """Update or create a download status record.""" - print('Updating Firestore Manifest.') + logger.info('Updating Firestore Manifest.') status = DownloadStatus.to_dict(download_status) doc_id = generate_md5_hash(status['location']) @@ -446,7 +449,7 @@ def _update(self, download_status: DownloadStatus) -> None: result: WriteResult = download_doc_ref.set(status) - print(f'Firestore manifest updated. ' + logger.info(f'Firestore manifest updated. ' f'update_time={result.update_time}, ' f'filename={download_status.location}.') diff --git a/weather_dl_v2/license_deployment/util.py b/weather_dl_v2/license_deployment/util.py index 7b8b128d..2afb29d3 100644 --- a/weather_dl_v2/license_deployment/util.py +++ b/weather_dl_v2/license_deployment/util.py @@ -1,4 +1,5 @@ import datetime +import logging import geojson import hashlib import itertools @@ -16,11 +17,20 @@ from urllib.parse import urlparse from google.api_core.exceptions import BadRequest +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) LATITUDE_RANGE = (-90, 90) LONGITUDE_RANGE = (-180, 180) GLOBAL_COVERAGE_AREA = [90, -180, -90, 180] +def exceptionit(func): + def inner_function(*args, **kwargs): + try: + func(*args, **kwargs) + except Exception as e: + logger.error(f"exception in {func.__name__} {e.__class__.__name__} {e}") + return inner_function def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: if isinstance(exception, socket.timeout): @@ -72,7 +82,7 @@ def copy(src: str, dst: str) -> None: try: subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: - print(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + logger.info(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') raise @@ -80,7 +90,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - print('Serializing to JSON') + logger.info('Serializing to JSON') if pd.isna(value) or value is None: return None @@ -181,5 +191,5 @@ def download_with_aria2(url: str, path: str) -> None: check=True, capture_output=True) except subprocess.CalledProcessError as e: - print(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + logger.info(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') raise From 257908dca81a595cb12f2ed1cc74aeaada469f24 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 19 Jul 2023 15:58:48 +0530 Subject: [PATCH 19/51] `dl-v2` server integration tests (#359) * added tests for fast-api server * added test for absolute license priority * minor fix --- .../fastapi-server/routers/download.py | 26 ++- .../fastapi-server/tests/__init__.py | 0 .../tests/integration/__init__.py | 0 .../tests/integration/test_download.py | 156 +++++++++++++++ .../tests/integration/test_license.py | 180 ++++++++++++++++++ .../tests/integration/test_queues.py | 145 ++++++++++++++ .../tests/test_data/example.cfg | 32 ++++ .../tests/test_data/no_exist.cfg | 32 ++++ 8 files changed, 565 insertions(+), 6 deletions(-) create mode 100644 weather_dl_v2/fastapi-server/tests/__init__.py create mode 100644 weather_dl_v2/fastapi-server/tests/integration/__init__.py create mode 100644 weather_dl_v2/fastapi-server/tests/integration/test_download.py create mode 100644 weather_dl_v2/fastapi-server/tests/integration/test_license.py create mode 100644 weather_dl_v2/fastapi-server/tests/integration/test_queues.py create mode 100644 weather_dl_v2/fastapi-server/tests/test_data/example.cfg create mode 100644 weather_dl_v2/fastapi-server/tests/test_data/no_exist.cfg diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 00252e9e..bc6d3a4f 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -3,6 +3,7 @@ from database.download_handler import DownloadHandler, get_download_handler from database.queue_handler import QueueHandler, get_queue_handler import shutil +import os router = APIRouter( prefix="/download", @@ -11,18 +12,31 @@ ) -def upload(file: UploadFile): - dest = f"./config_files/{file.filename}" - with open(dest, "wb+") as dest_: - shutil.copyfileobj(file.file, dest_) - return dest + + +def get_upload(): + def upload(file: UploadFile): + dest = f"./config_files/{file.filename}" + with open(dest, "wb+") as dest_: + shutil.copyfileobj(file.file, dest_) + return dest + + return upload + +def get_upload_mock(): + def upload(file: UploadFile): + return f"{os.getcwd()}/tests/test_data/{file.filename}" + + return upload # Can submit a config to the server. @router.post("/") def submit_download(file: UploadFile | None = None, licenses: list = [], background_tasks: BackgroundTasks = BackgroundTasks(), - download_handler: DownloadHandler = Depends(get_download_handler)): + download_handler: DownloadHandler = Depends(get_download_handler), + upload = Depends(get_upload) + ): if not file: raise HTTPException(status_code=404, detail="No upload file sent.") else: diff --git a/weather_dl_v2/fastapi-server/tests/__init__.py b/weather_dl_v2/fastapi-server/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/fastapi-server/tests/integration/__init__.py b/weather_dl_v2/fastapi-server/tests/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_download.py b/weather_dl_v2/fastapi-server/tests/integration/test_download.py new file mode 100644 index 00000000..6ec9fe01 --- /dev/null +++ b/weather_dl_v2/fastapi-server/tests/integration/test_download.py @@ -0,0 +1,156 @@ +import logging +import os +from fastapi.testclient import TestClient +from main import app, ROOT_DIR +from database.download_handler import get_download_handler, get_mock_download_handler +from database.license_handler import get_license_handler, get_mock_license_handler +from database.queue_handler import get_queue_handler, get_mock_queue_handler +from routers.download import get_upload, get_upload_mock + +client = TestClient(app) + +logger = logging.getLogger(__name__) + +app.dependency_overrides[get_download_handler] = get_mock_download_handler +app.dependency_overrides[get_license_handler] = get_mock_license_handler +app.dependency_overrides[get_queue_handler] = get_mock_queue_handler +app.dependency_overrides[get_upload] = get_upload_mock + + +def _get_download(headers, query, code, expected): + response = client.get("/download", headers=headers, params=query) + + assert response.status_code == code + assert response.json() == expected + +def test_get_downloads_basic(): + headers = {} + query = {} + code = 200 + expected = [ + { + "config_name": "config_1", + "client_name": "MARS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0 + }, + { + "config_name": "config_2", + "client_name": "MARS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0 + }, + { + "config_name": "config_3", + "client_name": "CDS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0 + } + ] + + _get_download(headers, query, code, expected) + + +def _submit_download(headers, file_path, licenses, code, expected): + file = None + try: + file = {"file" : open(file_path, 'rb')} + except FileNotFoundError: + print("file not found.") + + payload = {"licenses" : licenses} + + response = client.post("/download", headers=headers, files=file, data=payload) + + print(f"resp {response.json()}") + + assert response.status_code == code + assert response.json() == expected + +def test_submit_download_basic(): + header = { + "accept": "application/json", + } + file_path = os.path.join(ROOT_DIR, "tests/test_data/no_exist.cfg") + licenses = ["L1"] + code = 200 + expected = {'message': f"file 'no_exist.cfg' saved at '{os.getcwd()}/tests/test_data/no_exist.cfg' "'successfully.'} + + _submit_download(header, file_path, licenses, code, expected) + +def test_submit_download_file_not_uploaded(): + header = { + "accept": "application/json", + } + file_path = os.path.join(ROOT_DIR, "tests/test_data/wrong_file.cfg") + licenses = ["L1"] + code = 404 + expected = {"detail": "No upload file sent."} + + _submit_download(header, file_path, licenses, code, expected) + +def test_submit_download_file_alreadys_exist(): + header = { + "accept": "application/json", + } + file_path = os.path.join(ROOT_DIR, "tests/test_data/example.cfg") + licenses = ["L1"] + code = 400 + expected = { + "detail": "Please stop the ongoing download of the config file 'example.cfg' before attempting to start a new download." # noqa: E501 + } + + _submit_download(header, file_path, licenses, code, expected) + + +def _get_download_by_config(headers, config_name, code, expected): + response = client.get(f"/download/{config_name}", headers=headers) + + assert response.status_code == code + assert response.json() == expected + +def test_get_download_by_config_basic(): + headers = {} + config_name = "dummy_config" + code = 200 + expected = {"config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, + "downloaded_shards": 5000, "failed_shards": 0} + + _get_download_by_config(headers, config_name, code, expected) + +def test_get_download_by_config_wrong_config(): + headers = {} + config_name = "no_exist" + code = 404 + expected = {'detail': 'Download config not found in weather-dl v2.'} + + _get_download_by_config(headers, config_name, code, expected) + + +def _delete_download_by_config(headers, config_name, code, expected): + response = client.delete(f"/download/{config_name}",headers=headers) + + assert response.status_code == code + assert response.json() == expected + +def test_delete_download_by_config_basic(): + headers = {} + config_name = "dummy_config" + code = 200 + expected = {'config_name': 'dummy_config', 'message': 'Download config stopped & removed successfully.'} + + _delete_download_by_config(headers, config_name, code, expected) + +def test_delete_download_by_config_wrong_config(): + headers = {} + config_name = "no_exist" + code = 404 + expected = {'detail': 'No such download config to stop & remove.'} + + _delete_download_by_config(headers, config_name, code, expected) diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_license.py b/weather_dl_v2/fastapi-server/tests/integration/test_license.py new file mode 100644 index 00000000..49dfb352 --- /dev/null +++ b/weather_dl_v2/fastapi-server/tests/integration/test_license.py @@ -0,0 +1,180 @@ +import logging +import json +from fastapi.testclient import TestClient +from main import app +from database.download_handler import get_download_handler, get_mock_download_handler +from database.license_handler import get_license_handler, get_mock_license_handler +from routers.license import ( + get_create_deployment, + get_create_deployment_mock, + get_terminate_license_deployment, + get_terminate_license_deployment_mock + ) +from database.queue_handler import get_queue_handler, get_mock_queue_handler + +client = TestClient(app) + +logger = logging.getLogger(__name__) + +app.dependency_overrides[get_download_handler] = get_mock_download_handler +app.dependency_overrides[get_license_handler] = get_mock_license_handler +app.dependency_overrides[get_queue_handler] = get_mock_queue_handler +app.dependency_overrides[get_create_deployment] = get_create_deployment_mock +app.dependency_overrides[get_terminate_license_deployment] = get_terminate_license_deployment_mock + +def _get_license(headers, query, code, expected): + response = client.get("/license", headers=headers, params=query) + + assert response.status_code == code + assert response.json() == expected + +def test_get_license_basic(): + headers = {} + query = {} + code = 200 + expected = [ + { + "license_id": "L1", + 'secret_id': 'xxxx', + "client_name": "dummy_client", + "k8s_deployment_id": "k1", + "number_of_requets": 100 + } + ] + + _get_license(headers, query, code, expected) + +def test_get_license_client_name(): + headers = {} + client_name = "dummy_client" + query = {"client_name" : client_name} + code = 200 + expected = [ + { + "license_id": "L1", + "secret_id": "xxxx", + "client_name": client_name, + "k8s_deployment_id": "k1", + "number_of_requets": 100 + } + ] + + _get_license(headers, query, code, expected) + + +def _add_license(headers, payload, code, expected): + response = client.post("/license", headers=headers, data=json.dumps(payload), params={"license_id": "L1"}) + + assert response.status_code == code + assert response.json() == expected + +def test_add_license_basic(): + headers = { + "accept": "application/json", + "Content-Type" : "application/json" + } + license = { + "client_name": "dummy_client", + "number_of_requests" : 0, + "secret_id": "xxxx", + "api_email" : "email", + 'k8s_deployment_id': 'k1' + } + payload = license + code = 200 + expected = {'license_id': 'L1', 'message': 'License added successfully.'} + + _add_license(headers, payload, code, expected) + + +def _get_license_by_license_id(headers, license_id, code, expected): + response = client.get(f"/license/{license_id}", headers=headers) + + print(f"response {response.json()}") + assert response.status_code == code + assert response.json() == expected + +def test_get_license_by_license_id(): + headers = { + "accept": "application/json", + "Content-Type" : "application/json" + } + license_id = "L1" + code = 200 + expected = { + "license_id": license_id, + "secret_id": "xxxx", + "client_name": "dummy_client", + "k8s_deployment_id": "k1", + "number_of_requets": 100 + } + + _get_license_by_license_id(headers, license_id, code, expected) + +def test_get_license_wrong_license(): + headers = {} + license_id = "no_exists" + code = 404 + expected = { + "detail": "License not found.", + } + + _get_license_by_license_id(headers, license_id, code, expected) + + +def _update_license(headers, license_id, license, code, expected): + response = client.put(f"/license/{license_id}", headers=headers, data=json.dumps(license)) + + assert response.status_code == code + assert response.json() == expected + +def test_update_license_basic(): + headers = {} + license_id = "L1" + license = { + "client_name": "dummy_client", + "number_of_requests" : 0, + "secret_id": "xxxx", + "api_email" : "email" + } + code = 200 + expected = {"license_id": license_id, "name": "License updated successfully."} + + _update_license(headers, license_id, license, code, expected) + +def test_update_license_wrong_license_id(): + headers = {} + license_id = "no_exists" + license = { + "client_name": "dummy_client", + "number_of_requests" : 0, + "secret_id": "xxxx", + "api_email" : "email" + } + code = 404 + expected = {"detail": "No such license to update."} + + _update_license(headers, license_id, license, code, expected) + + +def _delete_license(headers, license_id, code, expected): + response = client.delete(f"/license/{license_id}", headers=headers) + + assert response.status_code == code + assert response.json() == expected + +def test_delete_license_basic(): + headers = {} + license_id = "L1" + code = 200 + expected = {"license_id": license_id, "message": "License removed successfully."} + + _delete_license(headers, license_id, code, expected) + +def test_delete_license_wrong_license(): + headers = {} + license_id = "no_exists" + code = 404 + expected = {"detail": "No such license to delete."} + + _delete_license(headers, license_id, code, expected) diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py new file mode 100644 index 00000000..ee82897b --- /dev/null +++ b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py @@ -0,0 +1,145 @@ +import logging +from main import app +from fastapi.testclient import TestClient +from database.download_handler import get_download_handler, get_mock_download_handler +from database.license_handler import get_license_handler, get_mock_license_handler +from database.queue_handler import get_queue_handler, get_mock_queue_handler + +client = TestClient(app) + +logger = logging.getLogger(__name__) + +app.dependency_overrides[get_download_handler] = get_mock_download_handler +app.dependency_overrides[get_license_handler] = get_mock_license_handler +app.dependency_overrides[get_queue_handler] = get_mock_queue_handler + +def _get_all_queue(headers, query, code, expected): + response = client.get("/queues", headers=headers, params=query) + + assert response.status_code == code + assert response.json() == expected + +def test_get_all_queues(): + headers = {} + query = {} + code = 200 + expected = [ + { + "client_name": "dummy_client", + "license_id": "L1", + "queue": [] + } + ] + + _get_all_queue(headers, query, code, expected) + +def test_get_client_queues(): + headers = {} + client_name = "dummy_client" + query = {"client_name": client_name} + code = 200 + expected = [ + { + "client_name": client_name, + "license_id": "L1", + "queue": [] + } + ] + + _get_all_queue(headers, query, code, expected) + + +def _get_queue_by_license(headers, license_id, code, expected): + response = client.get(f"/queues/{license_id}", headers=headers) + + assert response.status_code == code + assert response.json() == expected + +def test_get_queue_by_license_basic(): + headers = {} + license_id = "L1" + code = 200 + expected = { + "client_name": "dummy_client", + "license_id": license_id, + "queue": [] + } + + _get_queue_by_license(headers, license_id, code, expected) + +def test_get_queue_by_license_wrong_license(): + headers = {} + license_id = "no_exists" + code = 404 + expected = {"detail": "License's priority not found."} + + _get_queue_by_license(headers, license_id, code, expected) + + +def _modify_license_queue(headers, license_id, priority_list, code, expected): + response = client.post(f"/queues/{license_id}", headers=headers, data=priority_list) + + assert response.status_code == code + assert response.json() == expected + +def test_modify_license_queue_basic(): + headers = {} + license_id = "L1" + priority_list = [] + code = 200 + expected = {"message": f"'{license_id}' license priority updated successfully."} + + _modify_license_queue(headers, license_id, priority_list, code, expected) + +def test_modify_license_queue_wrong_license_id(): + headers = {} + license_id = "no_exists" + priority_list = [] + code = 404 + expected = {"detail": "License's priority not found."} + + _modify_license_queue(headers, license_id, priority_list, code, expected) + +def _modify_config_priority_in_license(headers, license_id, query, code, expected): + response = client.put(f"/queues/priority/{license_id}", params=query) + + print(f"response {response.json()}") + + assert response.status_code == code + assert response.json() == expected + +def test_modify_config_priority_in_license_basic(): + headers = {} + license_id = "L1" + query = { + "config_name": "example.cfg", + "priority": 0 + } + code = 200 + expected = {'message': f"'{license_id}' license 'example.cfg' priority updated successfully."} + + _modify_config_priority_in_license(headers, license_id, query, code, expected) + +def test_modify_config_priority_in_license_wrong_license(): + headers = {} + license_id = "no_exists" + query = { + "config_name": "example.cfg", + "priority": 0 + } + code = 404 + expected = {'detail': "License's priority not found."} + + _modify_config_priority_in_license(headers, license_id, query, code, expected) + +def test_modify_config_priority_in_license_wrong_config(): + headers = {} + license_id = "no_exists" + query = { + "config_name": "wrong.cfg", + "priority": 0 + } + code = 404 + expected = {'detail': "License's priority not found."} + + _modify_config_priority_in_license(headers, license_id, query, code, expected) diff --git a/weather_dl_v2/fastapi-server/tests/test_data/example.cfg b/weather_dl_v2/fastapi-server/tests/test_data/example.cfg new file mode 100644 index 00000000..6747012c --- /dev/null +++ b/weather_dl_v2/fastapi-server/tests/test_data/example.cfg @@ -0,0 +1,32 @@ +[parameters] +client=mars + +target_path=gs:///test-weather-dl-v2/{date}T00z.gb +partition_keys= + date + # step + +# API Keys & Subsections go here... + +[selection] +class=od +type=pf +stream=enfo +expver=0001 +levtype=pl +levelist=100 +# params: +# (z) Geopotential 129, (t) Temperature 130, +# (u) U component of wind 131, (v) V component of wind 132, +# (q) Specific humidity 133, (w) vertical velocity 135, +# (vo) Vorticity (relative) 138, (d) Divergence 155, +# (r) Relative humidity 157 +param=129.128 +# +# next: 2019-01-01/to/existing +# +date=2019-07-18/to/2019-07-20 +time=0000 +step=0/to/2 +number=1/to/2 +grid=F640 diff --git a/weather_dl_v2/fastapi-server/tests/test_data/no_exist.cfg b/weather_dl_v2/fastapi-server/tests/test_data/no_exist.cfg new file mode 100644 index 00000000..6747012c --- /dev/null +++ b/weather_dl_v2/fastapi-server/tests/test_data/no_exist.cfg @@ -0,0 +1,32 @@ +[parameters] +client=mars + +target_path=gs:///test-weather-dl-v2/{date}T00z.gb +partition_keys= + date + # step + +# API Keys & Subsections go here... + +[selection] +class=od +type=pf +stream=enfo +expver=0001 +levtype=pl +levelist=100 +# params: +# (z) Geopotential 129, (t) Temperature 130, +# (u) U component of wind 131, (v) V component of wind 132, +# (q) Specific humidity 133, (w) vertical velocity 135, +# (vo) Vorticity (relative) 138, (d) Divergence 155, +# (r) Relative humidity 157 +param=129.128 +# +# next: 2019-01-01/to/existing +# +date=2019-07-18/to/2019-07-20 +time=0000 +step=0/to/2 +number=1/to/2 +grid=F640 From 95c9d500f15f1a7afd6288eef8eeef1cf4fc8ecc Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Fri, 21 Jul 2023 15:50:52 +0530 Subject: [PATCH 20/51] updated cli create-instance command & lint fixes (#364) * updated vm creation command * cli lint fixes * revert pyproj * seperated startup script * updated readme * fixed typo --- weather_dl_v2/cli/README.md | 11 +++-------- weather_dl_v2/cli/app/config.py | 2 +- weather_dl_v2/cli/app/main.py | 2 +- .../cli/app/services/download_service.py | 14 +++++++------- .../cli/app/services/license_service.py | 15 +++++++-------- .../cli/app/services/network_service.py | 12 ++++++------ weather_dl_v2/cli/app/services/queue_service.py | 12 ++++++------ weather_dl_v2/cli/app/subcommands/download.py | 8 ++++---- weather_dl_v2/cli/app/subcommands/license.py | 10 +++++----- weather_dl_v2/cli/app/subcommands/queue.py | 16 ++++++++-------- weather_dl_v2/cli/app/utils.py | 10 +++++----- weather_dl_v2/cli/vm-startup.sh | 4 ++++ 12 files changed, 57 insertions(+), 59 deletions(-) create mode 100644 weather_dl_v2/cli/vm-startup.sh diff --git a/weather_dl_v2/cli/README.md b/weather_dl_v2/cli/README.md index c5b718be..12eecb9b 100644 --- a/weather_dl_v2/cli/README.md +++ b/weather_dl_v2/cli/README.md @@ -46,19 +46,14 @@ gcloud compute instances create-with-container weather-dl-v2-cli \ --boot-disk-device-name=weather-dl-v2-cli \ --container-image=$IMAGE_PATH \ --container-restart-policy=on-failure \ + --container-tty \ --no-shielded-secure-boot \ --shielded-vtpm \ - --shielded-integrity-monitoring \ - --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-24 -``` - -## To get the weather-dl-v2 cli docker-image id in VM -``` -docker images + --labels=goog-ec-src=vm_add-gcloud,container-vm=cos-stable-105-17412-101-24 \ + --metadata-from-file=startup-script=vm-startup.sh ``` ## Use the cli after doing ssh in the above created VM ``` -docker run -it weather-dl-v2 --help ``` diff --git a/weather_dl_v2/cli/app/config.py b/weather_dl_v2/cli/app/config.py index 174a9c22..2080e564 100644 --- a/weather_dl_v2/cli/app/config.py +++ b/weather_dl_v2/cli/app/config.py @@ -5,4 +5,4 @@ def __init__(self): if "BASE_URI" in os.environ: self.BASE_URI = os.environ["BASE_URI"] else: - raise KeyError("BASE_URI not in environment.") \ No newline at end of file + raise KeyError("BASE_URI not in environment.") diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py index a94d025a..5031d948 100644 --- a/weather_dl_v2/cli/app/main.py +++ b/weather_dl_v2/cli/app/main.py @@ -26,4 +26,4 @@ def ping(): logger.info(x.text) if __name__ == "__main__": - app() \ No newline at end of file + app() diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index e7468afa..f65e91fb 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -7,7 +7,7 @@ logger = logging.getLogger(__name__) class DownloadService(abc.ABC): - + @abc.abstractmethod def _list_all_downloads(self): pass @@ -37,33 +37,33 @@ def _list_all_downloads(self): uri = self.endpoint, header = {"accept": "application/json"} ) - + def _list_all_downloads_by_client_name(self, client_name: str): return network_service.get( uri = self.endpoint, header = {"accept": "application/json"}, query = {"client_name": client_name} ) - + def _get_download_by_config(self, config_name: str): return network_service.get( uri = f"{self.endpoint}/download{config_name}", header = {"accept": "application/json"} ) - + def _add_new_download(self, file_path: str, licenses: t.List[str]): try: file = {"file" : open(file_path, 'rb')} except FileNotFoundError: return "File not found." - + return network_service.post( uri=self.endpoint, header = {"accept": "application/json"}, file = file, payload = {"licenses": licenses} ) - + def _remove_download(self, config_name: str): return network_service.delete( uri=f"{self.endpoint}/{config_name}", @@ -79,4 +79,4 @@ def get_download_service(test: bool = False): else: return DownloadServiceNetwork() -download_service = get_download_service() \ No newline at end of file +download_service = get_download_service() diff --git a/weather_dl_v2/cli/app/services/license_service.py b/weather_dl_v2/cli/app/services/license_service.py index 43239664..32e422e1 100644 --- a/weather_dl_v2/cli/app/services/license_service.py +++ b/weather_dl_v2/cli/app/services/license_service.py @@ -1,7 +1,6 @@ import abc import logging import json -import typing as t from app.services.network_service import network_service from app.config import Config @@ -42,33 +41,33 @@ def _get_all_license(self): uri = self.endpoint, header = {"accept": "application/json"} ) - + def _get_all_license_by_client_name(self, client_name: str): return network_service.get( uri = self.endpoint, header = {"accept": "application/json"}, query = {"client_name": client_name} ) - + def _get_license_by_license_id(self, license_id: str): return network_service.get( uri = f"{self.endpoint}/{license_id}", header = {"accept": "application/json"}, ) - + def _add_license(self, license_dict: dict): return network_service.post( uri = self.endpoint, header = {"accept": "application/json"}, payload = json.dumps(license_dict) ) - + def _remove_license(self, license_id: str): return network_service.delete( uri = f"{self.endpoint}/{license_id}", header = {"accept": "application/json"}, ) - + def _update_license(self, license_id: str, license_dict: dict): return network_service.put( uri = f"{self.endpoint}/{license_id}", @@ -76,7 +75,7 @@ def _update_license(self, license_id: str, license_dict: dict): payload = json.dumps(license_dict) ) - + class LicenseServiceMock(LicenseService): pass @@ -85,5 +84,5 @@ def get_license_service(test: bool = False): return LicenseServiceMock() else: return LicenseServiceNetwork() - + license_service = get_license_service() diff --git a/weather_dl_v2/cli/app/services/network_service.py b/weather_dl_v2/cli/app/services/network_service.py index dbce8961..234c565f 100644 --- a/weather_dl_v2/cli/app/services/network_service.py +++ b/weather_dl_v2/cli/app/services/network_service.py @@ -26,7 +26,7 @@ def parse_response(self, response: requests.Response): return return json.dumps(parsed, indent=3) - + @timeit def get(self, uri, header, query=None, payload=None): try: @@ -35,7 +35,7 @@ def get(self, uri, header, query=None, payload=None): except requests.exceptions.RequestException as e: logger.error(f"request error: {e}") raise SystemExit(e) - + @timeit def post(self, uri, header, query=None, payload=None, file=None): try: @@ -44,7 +44,7 @@ def post(self, uri, header, query=None, payload=None, file=None): except requests.exceptions.RequestException as e: logger.error(f"request error: {e}") raise SystemExit(e) - + @timeit def put(self, uri, header, query=None, payload=None, file=None): try: @@ -54,7 +54,7 @@ def put(self, uri, header, query=None, payload=None, file=None): except requests.exceptions.RequestException as e: logger.error(f"request error: {e}") raise SystemExit(e) - + @timeit def delete(self, uri, header, query=None): try: @@ -63,5 +63,5 @@ def delete(self, uri, header, query=None): except requests.exceptions.RequestException as e: logger.error(f"request error: {e}") raise SystemExit(e) - -network_service = NetworkService() \ No newline at end of file + +network_service = NetworkService() diff --git a/weather_dl_v2/cli/app/services/queue_service.py b/weather_dl_v2/cli/app/services/queue_service.py index c49a3986..f4dfae0f 100644 --- a/weather_dl_v2/cli/app/services/queue_service.py +++ b/weather_dl_v2/cli/app/services/queue_service.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) class QueueService(abc.ABC): - + @abc.abstractmethod def _get_all_license_queues(self): pass @@ -39,20 +39,20 @@ def _get_all_license_queues(self): uri = self.endpoint, header = {"accept": "application/json"} ) - + def _get_license_queue_by_client_name(self, client_name: str): return network_service.get( uri = self.endpoint, header = {"accept": "application/json"}, query = {"client_name": client_name} ) - + def _get_queue_by_license(self, license_id: str): return network_service.get( uri = f"{self.endpoint}/{license_id}", header = {"accept": "application/json"} ) - + def _edit_license_queue(self, license_id: str, priority_list: t.List[str]): return network_service.post( uri = f"{self.endpoint}/{license_id}", @@ -80,5 +80,5 @@ def get_queue_service(test: bool = False): return QueueServiceMock() else: return QueueServiceNetwork() - -queue_service = get_queue_service() \ No newline at end of file + +queue_service = get_queue_service() diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 46e8684a..916faa97 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -21,10 +21,10 @@ def get_downloads( client_name = data['client_name'] except Exception as e: print(f"filter error: {e}") - return - + return + print(download_service._list_all_downloads_by_client_name(client_name)) - return + return print(download_service._list_all_downloads()) @@ -36,7 +36,7 @@ def submit_download( if len(license) == 0: print("No licenses mentioned. Please specify licenese Id.") return - + print(download_service._add_new_download(file_path, license)) @app.command("get", help="Get a particular config.") diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py index f1c94929..d2931fd6 100644 --- a/weather_dl_v2/cli/app/subcommands/license.py +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -22,10 +22,10 @@ def get_all_license( except Exception as e: print(f"filter error: {e}") return - + print(license_service._get_all_license_by_client_name(client_name)) return - + print(license_service._get_all_license()) @app.command("get", help="Get a particular license by ID.") @@ -36,7 +36,7 @@ def get_license( @app.command("add", help="Add new license.") def add_license( - file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')], + file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')], #noqa ): validator = LicenseValidator( valid_keys=[ @@ -51,7 +51,7 @@ def add_license( except Exception as e: print(f"payload error: {e}") return - + print(license_service._add_license(license_dict)) @app.command("remove", help="Remove a license.") @@ -63,7 +63,7 @@ def remove_license( @app.command("update", help="Update existing license.") def update_license( license: Annotated[str, typer.Argument(help="License ID.")], - file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for updated license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')] + file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for updated license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')] #noqa ): validator = LicenseValidator( valid_keys=[ diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py index 5e6a75cb..d22a6c87 100644 --- a/weather_dl_v2/cli/app/subcommands/queue.py +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -13,7 +13,7 @@ def get_all_license_queue( filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None ): if filter: - + validator = QueueValidator(valid_keys=["client_name"]) try: @@ -21,11 +21,11 @@ def get_all_license_queue( client_name = data['client_name'] except Exception as e: print(f"filter error: {e}") - return - + return + print(queue_service._get_license_queue_by_client_name(client_name)) return - + print(queue_service._get_all_license_queues()) @app.command("get", help="Get queue of particular license.") @@ -34,12 +34,12 @@ def get_license_queue( ): print(queue_service._get_queue_by_license(license)) -@app.command("edit", help="Edit existing license queue. Queue can edited via a priority file or my moving a single config to a given priority.") +@app.command("edit", help="Edit existing license queue. Queue can edited via a priority file or my moving a single config to a given priority.") #noqa def modify_license_queue( license: Annotated[str, typer.Argument(help="License ID.")], - file: Annotated[str, typer.Option("--file", "-f", help='''File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}''')] = None, + file: Annotated[str, typer.Option("--file", "-f", help='''File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}''')] = None, #noqa config: Annotated[str, typer.Option("--config", "-c", help="Config name for absolute priority.")] = None, - priority: Annotated[int, typer.Option("--priority", "-p", help="Absolute priority for the config in a license queue. Priority increases in ascending order with 0 having highest priority.")] = None + priority: Annotated[int, typer.Option("--priority", "-p", help="Absolute priority for the config in a license queue. Priority increases in ascending order with 0 having highest priority.")] = None #noqa ): if file is None and (config is None and priority is None): @@ -72,5 +72,5 @@ def modify_license_queue( print("--config & --priority arguments should be used together.") return - + diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 70afe9a6..84119b57 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -46,18 +46,18 @@ def validate_json(self, file_path): except FileNotFoundError: logger.info("file not found.") raise FileNotFoundError - + def _validate_keys(self, data_set: set, valid_set: set): if data_set == valid_set: return True - + missing_keys = valid_set.difference(data_set) invalid_keys = data_set.difference(valid_set) if len(missing_keys) > 0: raise ValueError(f"keys {missing_keys} are missing in file.") - + if len(invalid_keys) > 0: raise ValueError(f"keys {invalid_keys} are invalid keys.") - - return False \ No newline at end of file + + return False diff --git a/weather_dl_v2/cli/vm-startup.sh b/weather_dl_v2/cli/vm-startup.sh new file mode 100644 index 00000000..e36f6edc --- /dev/null +++ b/weather_dl_v2/cli/vm-startup.sh @@ -0,0 +1,4 @@ +#! /bin/bash + +command="docker exec -it \\\$(docker ps -qf name=weather-dl-v2-cli) /bin/bash" +sudo sh -c "echo \"$command\" >> /etc/profile" \ No newline at end of file From 4cc633c7eb4d4a96e67706038ea56095ca67078e Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Mon, 24 Jul 2023 15:21:15 +0530 Subject: [PATCH 21/51] Pending deployment bootup (#366) * Added pending deployments on server bootup. * minor refactoring in db layer * using create license deployment from license route * updated args in downloader * minor fix --- .../downloader_kubernetes/downloader.py | 3 +- .../database/license_handler.py | 28 ++++++------------- .../fastapi-server/database/queue_handler.py | 24 ++++++++++++++++ weather_dl_v2/fastapi-server/main.py | 22 +++++++++++++-- .../fastapi-server/routers/download.py | 2 -- .../fastapi-server/routers/license.py | 7 +++-- 6 files changed, 59 insertions(+), 27 deletions(-) diff --git a/weather_dl_v2/downloader_kubernetes/downloader.py b/weather_dl_v2/downloader_kubernetes/downloader.py index d81fcc43..d27db2d7 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader.py +++ b/weather_dl_v2/downloader_kubernetes/downloader.py @@ -52,5 +52,4 @@ def main(config_name, dataset, selection, user_id, url, target_path) -> None: os.unlink(temp_name) if __name__ == '__main__': - temp_args = sys.argv - main(temp_args[1], temp_args[2], temp_args[3], temp_args[4], temp_args[5], temp_args[6]) + main(*sys.argv[1:]) diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index 7a88c7a2..53207829 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -43,11 +43,7 @@ def _update_license(self, license_id: str, license_dict: dict) -> None: pass @abc.abstractmethod - def _create_license_queue(self, license_id: str, client_name: str) -> None: - pass - - @abc.abstractmethod - def _remove_license_queue(self, license_id: str) -> None: + def _get_license_without_deployment(self) -> list: pass class LicenseHandlerMock(LicenseHandler): @@ -102,11 +98,8 @@ def _get_licenses(self) -> list: } ] - def _create_license_queue(self, license_id: str, client_name: str) -> None: - logger.info("Added L1 queue in 'queues' collection. Update_time: 00000.") - - def _remove_license_queue(self, license_id: str) -> None: - logger.info("Removed L1 queue in 'queues' collection. Update_time: 00000.") + def _get_license_without_deployment(self) -> list: + return [] class LicenseHandlerFirestore(LicenseHandler): def __init__(self, db: firestore.firestore.Client): @@ -153,12 +146,9 @@ def _get_licenses(self) -> list: result.append(self.db.collection(self.collection).document(snapshot.id).get().to_dict()) return result - def _create_license_queue(self, license_id: str, client_name: str) -> None: - result: WriteResult = self.db.collection('queues').document(license_id).set( - {"license_id": license_id, "client_name": client_name, "queue": []} - ) - logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") - - def _remove_license_queue(self, license_id: str) -> None: - timestamp = self.db.collection('queues').document(license_id).delete() - logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") + def _get_license_without_deployment(self) -> list: + snapshot_list = self.db.collection(self.collection).where('k8s_deployment_id', '==', '').get() + result = [] + for snapshot in snapshot_list: + result.append(snapshot.to_dict()['license_id']) + return result diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index a7edf5f9..0c2f11f0 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -50,6 +50,14 @@ def _update_queues_on_stop_download(self, config_name: str) -> None: def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: pass + @abc.abstractmethod + def _create_license_queue(self, license_id: str, client_name: str) -> None: + pass + + @abc.abstractmethod + def _remove_license_queue(self, license_id: str) -> None: + pass + class QueueHandlerMock(QueueHandler): def __init__(self): pass @@ -99,6 +107,12 @@ def _update_queues_on_stop_download(self, config_name: str) -> None: def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: print(f"Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") + def _create_license_queue(self, license_id: str, client_name: str) -> None: + logger.info("Added L1 queue in 'queues' collection. Update_time: 00000.") + + def _remove_license_queue(self, license_id: str) -> None: + logger.info("Removed L1 queue in 'queues' collection. Update_time: 00000.") + class QueueHandlerFirestore(QueueHandler): def __init__(self, db: firestore.firestore.Client, collection: str = "queues"): self.db = db @@ -164,3 +178,13 @@ def _update_config_priority_in_license(self, license_id: str, config_name: str, {'queue': new_priority_list} ) print(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _create_license_queue(self, license_id: str, client_name: str) -> None: + result: WriteResult = self.db.collection('queues').document(license_id).set( + {"license_id": license_id, "client_name": client_name, "queue": []} + ) + logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + + def _remove_license_queue(self, license_id: str) -> None: + timestamp = self.db.collection('queues').document(license_id).delete() + logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 01dedfc8..f40ee4d4 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -4,6 +4,9 @@ from contextlib import asynccontextmanager from fastapi import FastAPI from routers import license, download, queues +from database.license_handler import get_license_handler +from license_dep.deployment_creator import create_license_deployment +from routers.license import get_create_deployment ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -11,13 +14,28 @@ logging.config.fileConfig('logging.conf', disable_existing_loggers=False) logger = logging.getLogger(__name__) +def create_pending_license_deployments(): + """Creates license deployments for Licenses whose deployments does not exist.""" + license_handler = get_license_handler() + create_deployment = get_create_deployment() + license_list = license_handler._get_license_without_deployment() + + for license in license_list: + try: + logger.info(f"Creating license deployment for {license}") + create_deployment(license) + except Exception as e: + logger.error(f"License deployment failed for {license}. Exception: {e}") + @asynccontextmanager async def lifespan(app: FastAPI): logger.info("Started FastAPI server") # Boot up # TODO: Replace hard-coded collection name by read a server config. - logger.info("Create database if not already exists.") - logger.info("Retrieve license information & create license deployment if needed.") + + # Retrieve license information & create license deployment if needed. + create_pending_license_deployments() + yield # Clean up diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index bc6d3a4f..b538f454 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -12,8 +12,6 @@ ) - - def get_upload(): def upload(file: UploadFile): dest = f"./config_files/{file.filename}" diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 486c3f49..be065d10 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -2,6 +2,7 @@ from pydantic import BaseModel from license_dep.deployment_creator import create_license_deployment, terminate_license_deployment from database.license_handler import LicenseHandler, get_license_handler +from database.queue_handler import QueueHandler, get_queue_handler # TODO: Make use of google secret manager. @@ -97,11 +98,12 @@ def update_license_internal(license_id: str, async def add_license(license: License, background_tasks: BackgroundTasks = BackgroundTasks(), license_handler: LicenseHandler = Depends(get_license_handler), + queue_handler: QueueHandler = Depends(get_queue_handler), create_deployment = Depends(get_create_deployment)): license_dict = license.dict() license_dict['k8s_deployment_id'] = "" license_id = license_handler._add_license(license_dict) - license_handler._create_license_queue(license_id, license_dict['client_name']) + queue_handler._create_license_queue(license_id, license_dict['client_name']) background_tasks.add_task(create_deployment, license_id) return {"license_id": license_id, "message": "License added successfully."} @@ -111,11 +113,12 @@ async def add_license(license: License, async def delete_license(license_id: str, background_tasks: BackgroundTasks = BackgroundTasks(), license_handler: LicenseHandler = Depends(get_license_handler), + queue_handler: QueueHandler = Depends(get_queue_handler), terminate_license_deployment = Depends(get_terminate_license_deployment) ): if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to delete.") license_handler._delete_license(license_id) - license_handler._remove_license_queue(license_id) + queue_handler._remove_license_queue(license_id) background_tasks.add_task(terminate_license_deployment, license_id) return {"license_id": license_id, "message": "License removed successfully."} From 6e797a23915995d9addc0d74a7c5a96cd0115f8a Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Mon, 24 Jul 2023 10:33:46 +0000 Subject: [PATCH 22/51] Updated README.md. --- weather_dl_v2/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weather_dl_v2/README.md b/weather_dl_v2/README.md index 2b0649b9..d99fa504 100644 --- a/weather_dl_v2/README.md +++ b/weather_dl_v2/README.md @@ -4,4 +4,4 @@ 1) Refer to downloader_kubernetes/README.md 2) Refer to license_deployment/README.md 3) Refer to fastapi-server/README.md - +4) Refer to cli/README.md From 2aec32a532830917f22cbe5cf54951414a26aef7 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Tue, 25 Jul 2023 20:59:21 +0530 Subject: [PATCH 23/51] formatting by pyink for dl_v2 (#367) * added pyink to env ymls * formatting changes * updated readme * lint fixes * remove pyink from env --- weather_dl_v2/README.md | 2 +- weather_dl_v2/cli/app/config.py | 2 + weather_dl_v2/cli/app/main.py | 6 +- .../cli/app/services/download_service.py | 30 +-- .../cli/app/services/license_service.py | 34 +-- .../cli/app/services/network_service.py | 16 +- .../cli/app/services/queue_service.py | 38 ++-- weather_dl_v2/cli/app/subcommands/download.py | 31 ++- weather_dl_v2/cli/app/subcommands/license.py | 56 ++--- weather_dl_v2/cli/app/subcommands/queue.py | 54 +++-- weather_dl_v2/cli/app/utils.py | 4 +- weather_dl_v2/cli/setup.py | 18 +- weather_dl_v2/downloader_kubernetes/README.md | 8 +- .../downloader_kubernetes/downloader.py | 16 +- .../downloader_kubernetes/manifest.py | 201 ++++++++++------- weather_dl_v2/downloader_kubernetes/util.py | 62 ++++-- weather_dl_v2/fastapi-server/README.md | 16 +- .../config_processing/config.py | 30 +-- .../config_processing/manifest.py | 202 ++++++++++------- .../config_processing/parsers.py | 189 +++++++++------- .../config_processing/partition.py | 28 ++- .../config_processing/pipeline.py | 3 +- .../config_processing/stores.py | 20 +- .../fastapi-server/config_processing/util.py | 62 ++++-- .../database/download_handler.py | 38 +++- .../database/license_handler.py | 87 +++++--- .../fastapi-server/database/queue_handler.py | 172 ++++++++++----- .../fastapi-server/database/session.py | 9 +- .../license_dep/deployment_creator.py | 9 +- weather_dl_v2/fastapi-server/main.py | 18 +- .../fastapi-server/routers/download.py | 106 ++++++--- .../fastapi-server/routers/license.py | 67 +++--- .../fastapi-server/routers/queues.py | 40 ++-- .../tests/integration/test_download.py | 50 +++-- .../tests/integration/test_license.py | 97 +++++---- .../tests/integration/test_queues.py | 56 ++--- weather_dl_v2/license_deployment/README.md | 6 +- weather_dl_v2/license_deployment/clients.py | 50 +++-- weather_dl_v2/license_deployment/config.py | 30 +-- weather_dl_v2/license_deployment/database.py | 62 ++++-- weather_dl_v2/license_deployment/fetch.py | 65 +++--- .../license_deployment/job_creator.py | 17 +- weather_dl_v2/license_deployment/manifest.py | 203 +++++++++++------- weather_dl_v2/license_deployment/util.py | 65 ++++-- 44 files changed, 1494 insertions(+), 881 deletions(-) diff --git a/weather_dl_v2/README.md b/weather_dl_v2/README.md index d99fa504..35e7d504 100644 --- a/weather_dl_v2/README.md +++ b/weather_dl_v2/README.md @@ -1,6 +1,6 @@ ## weather-dl-v2 -* **Sequence of steps:** +### Sequence of steps: 1) Refer to downloader_kubernetes/README.md 2) Refer to license_deployment/README.md 3) Refer to fastapi-server/README.md diff --git a/weather_dl_v2/cli/app/config.py b/weather_dl_v2/cli/app/config.py index 2080e564..0ea12e4a 100644 --- a/weather_dl_v2/cli/app/config.py +++ b/weather_dl_v2/cli/app/config.py @@ -1,6 +1,8 @@ import os + class Config: + def __init__(self): if "BASE_URI" in os.environ: self.BASE_URI = os.environ["BASE_URI"] diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py index 5031d948..85066d72 100644 --- a/weather_dl_v2/cli/app/main.py +++ b/weather_dl_v2/cli/app/main.py @@ -6,7 +6,9 @@ logger = logging.getLogger(__name__) -app = typer.Typer(help="weather-dl-v2 is a cli tool for communicating with FastAPI server.") +app = typer.Typer( + help="weather-dl-v2 is a cli tool for communicating with FastAPI server." +) app.add_typer(download.app, name="download", help="Manage downloads.") app.add_typer(queue.app, name="queue", help="Manage queues.") @@ -22,8 +24,8 @@ def ping(): except requests.exceptions.RequestException as e: raise SystemExit(e) - logger.info(x.text) + if __name__ == "__main__": app() diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index f65e91fb..a021ee18 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -6,6 +6,7 @@ logger = logging.getLogger(__name__) + class DownloadService(abc.ABC): @abc.abstractmethod @@ -28,55 +29,58 @@ def _add_new_download(self, file_path: str, licenses: t.List[str]): def _remove_download(self, config_name: str): pass + class DownloadServiceNetwork(DownloadService): + def __init__(self): self.endpoint = f"{Config().BASE_URI}/download" def _list_all_downloads(self): return network_service.get( - uri = self.endpoint, - header = {"accept": "application/json"} + uri=self.endpoint, header={"accept": "application/json"} ) def _list_all_downloads_by_client_name(self, client_name: str): return network_service.get( - uri = self.endpoint, - header = {"accept": "application/json"}, - query = {"client_name": client_name} + uri=self.endpoint, + header={"accept": "application/json"}, + query={"client_name": client_name}, ) def _get_download_by_config(self, config_name: str): return network_service.get( - uri = f"{self.endpoint}/download{config_name}", - header = {"accept": "application/json"} + uri=f"{self.endpoint}/download{config_name}", + header={"accept": "application/json"}, ) def _add_new_download(self, file_path: str, licenses: t.List[str]): try: - file = {"file" : open(file_path, 'rb')} + file = {"file": open(file_path, "rb")} except FileNotFoundError: return "File not found." return network_service.post( uri=self.endpoint, - header = {"accept": "application/json"}, - file = file, - payload = {"licenses": licenses} + header={"accept": "application/json"}, + file=file, + payload={"licenses": licenses}, ) def _remove_download(self, config_name: str): return network_service.delete( - uri=f"{self.endpoint}/{config_name}", - header = {"accept": "application/json"} + uri=f"{self.endpoint}/{config_name}", header={"accept": "application/json"} ) + class DownloadServiceMock(DownloadService): pass + def get_download_service(test: bool = False): if test: return DownloadServiceMock() else: return DownloadServiceNetwork() + download_service = get_download_service() diff --git a/weather_dl_v2/cli/app/services/license_service.py b/weather_dl_v2/cli/app/services/license_service.py index 32e422e1..9ef0ab39 100644 --- a/weather_dl_v2/cli/app/services/license_service.py +++ b/weather_dl_v2/cli/app/services/license_service.py @@ -6,7 +6,9 @@ logger = logging.getLogger(__name__) + class LicenseService(abc.ABC): + @abc.abstractmethod def _get_all_license(self): pass @@ -33,56 +35,58 @@ def _update_license(self, license_id: str, license_dict: dict): class LicenseServiceNetwork(LicenseService): + def __init__(self): self.endpoint = f"{Config().BASE_URI}/license" def _get_all_license(self): return network_service.get( - uri = self.endpoint, - header = {"accept": "application/json"} + uri=self.endpoint, header={"accept": "application/json"} ) def _get_all_license_by_client_name(self, client_name: str): return network_service.get( - uri = self.endpoint, - header = {"accept": "application/json"}, - query = {"client_name": client_name} + uri=self.endpoint, + header={"accept": "application/json"}, + query={"client_name": client_name}, ) def _get_license_by_license_id(self, license_id: str): return network_service.get( - uri = f"{self.endpoint}/{license_id}", - header = {"accept": "application/json"}, + uri=f"{self.endpoint}/{license_id}", + header={"accept": "application/json"}, ) def _add_license(self, license_dict: dict): return network_service.post( - uri = self.endpoint, - header = {"accept": "application/json"}, - payload = json.dumps(license_dict) + uri=self.endpoint, + header={"accept": "application/json"}, + payload=json.dumps(license_dict), ) def _remove_license(self, license_id: str): return network_service.delete( - uri = f"{self.endpoint}/{license_id}", - header = {"accept": "application/json"}, + uri=f"{self.endpoint}/{license_id}", + header={"accept": "application/json"}, ) def _update_license(self, license_id: str, license_dict: dict): return network_service.put( - uri = f"{self.endpoint}/{license_id}", - header = {"accept": "application/json"}, - payload = json.dumps(license_dict) + uri=f"{self.endpoint}/{license_id}", + header={"accept": "application/json"}, + payload=json.dumps(license_dict), ) class LicenseServiceMock(LicenseService): pass + def get_license_service(test: bool = False): if test: return LicenseServiceMock() else: return LicenseServiceNetwork() + license_service = get_license_service() diff --git a/weather_dl_v2/cli/app/services/network_service.py b/weather_dl_v2/cli/app/services/network_service.py index 234c565f..f614ca67 100644 --- a/weather_dl_v2/cli/app/services/network_service.py +++ b/weather_dl_v2/cli/app/services/network_service.py @@ -5,17 +5,20 @@ logger = logging.getLogger(__name__) + def timeit(func): def wrap_func(*args, **kwargs): t1 = time() result = func(*args, **kwargs) t2 = time() - print(f'[executed in {(t2-t1):.4f}s.]') + print(f"[executed in {(t2-t1):.4f}s.]") return result + return wrap_func class NetworkService: + def parse_response(self, response: requests.Response): try: parsed = json.loads(response.text) @@ -39,7 +42,9 @@ def get(self, uri, header, query=None, payload=None): @timeit def post(self, uri, header, query=None, payload=None, file=None): try: - x = requests.post(uri, params=query, headers=header, data=payload, files=file) + x = requests.post( + uri, params=query, headers=header, data=payload, files=file + ) return self.parse_response(x) except requests.exceptions.RequestException as e: logger.error(f"request error: {e}") @@ -48,7 +53,9 @@ def post(self, uri, header, query=None, payload=None, file=None): @timeit def put(self, uri, header, query=None, payload=None, file=None): try: - x = requests.put(uri, params=query, headers=header, data=payload, files=file) + x = requests.put( + uri, params=query, headers=header, data=payload, files=file + ) return self.parse_response(x) except requests.exceptions.RequestException as e: @@ -58,10 +65,11 @@ def put(self, uri, header, query=None, payload=None, file=None): @timeit def delete(self, uri, header, query=None): try: - x = requests.delete(uri, params=query, headers=header) + x = requests.delete(uri, params=query, headers=header) return self.parse_response(x) except requests.exceptions.RequestException as e: logger.error(f"request error: {e}") raise SystemExit(e) + network_service = NetworkService() diff --git a/weather_dl_v2/cli/app/services/queue_service.py b/weather_dl_v2/cli/app/services/queue_service.py index f4dfae0f..1191642c 100644 --- a/weather_dl_v2/cli/app/services/queue_service.py +++ b/weather_dl_v2/cli/app/services/queue_service.py @@ -7,6 +7,7 @@ logger = logging.getLogger(__name__) + class QueueService(abc.ABC): @abc.abstractmethod @@ -26,48 +27,48 @@ def _edit_license_queue(self, license_id: str, priority_list: t.List[str]): pass @abc.abstractmethod - def _edit_config_absolute_priority(self, license_id: str, config_name: str, priority: int): + def _edit_config_absolute_priority( + self, license_id: str, config_name: str, priority: int + ): pass class QueueServiceNetwork(QueueService): + def __init__(self): self.endpoint = f"{Config().BASE_URI}/queues" def _get_all_license_queues(self): return network_service.get( - uri = self.endpoint, - header = {"accept": "application/json"} + uri=self.endpoint, header={"accept": "application/json"} ) def _get_license_queue_by_client_name(self, client_name: str): return network_service.get( - uri = self.endpoint, - header = {"accept": "application/json"}, - query = {"client_name": client_name} + uri=self.endpoint, + header={"accept": "application/json"}, + query={"client_name": client_name}, ) def _get_queue_by_license(self, license_id: str): return network_service.get( - uri = f"{self.endpoint}/{license_id}", - header = {"accept": "application/json"} + uri=f"{self.endpoint}/{license_id}", header={"accept": "application/json"} ) def _edit_license_queue(self, license_id: str, priority_list: t.List[str]): return network_service.post( - uri = f"{self.endpoint}/{license_id}", - header = { - "accept": "application/json", - 'Content-Type': 'application/json' - }, - payload = json.dumps(priority_list) + uri=f"{self.endpoint}/{license_id}", + header={"accept": "application/json", "Content-Type": "application/json"}, + payload=json.dumps(priority_list), ) - def _edit_config_absolute_priority(self, license_id: str, config_name: str, priority: int): + def _edit_config_absolute_priority( + self, license_id: str, config_name: str, priority: int + ): return network_service.put( - uri = f"{self.endpoint}/priority/{license_id}", - header = {"accept": "application/json"}, - query = {"config_name": config_name, "priority": priority} + uri=f"{self.endpoint}/priority/{license_id}", + header={"accept": "application/json"}, + query={"config_name": config_name, "priority": priority}, ) @@ -81,4 +82,5 @@ def get_queue_service(test: bool = False): else: return QueueServiceNetwork() + queue_service = get_queue_service() diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 916faa97..10b5fbb4 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -6,19 +6,23 @@ app = typer.Typer() + class DowloadFilterValidator(Validator): pass + @app.command("list", help="List out all the configs.") def get_downloads( - filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value.")] = None - ): + filter: Annotated[ + str, typer.Option(help="Filter by some value. Format: filter_key=filter_value.") + ] = None +): if filter: validator = DowloadFilterValidator(valid_keys=["client_name"]) try: data = validator.validate(filters=[filter]) - client_name = data['client_name'] + client_name = data["client_name"] except Exception as e: print(f"filter error: {e}") return @@ -28,25 +32,32 @@ def get_downloads( print(download_service._list_all_downloads()) + @app.command("add", help="Submit new config to download.") def submit_download( - file_path: Annotated[str, typer.Argument(help="File path of config to be uploaded.")], - license: Annotated[List[str], typer.Option("--license", "-l", help="License ID.")] = [], - ): + file_path: Annotated[ + str, typer.Argument(help="File path of config to be uploaded.") + ], + license: Annotated[ + List[str], typer.Option("--license", "-l", help="License ID.") + ] = [], +): if len(license) == 0: print("No licenses mentioned. Please specify licenese Id.") return print(download_service._add_new_download(file_path, license)) + @app.command("get", help="Get a particular config.") def get_download_by_config( - config_name: Annotated[str, typer.Argument(help="Config file name.")] - ): + config_name: Annotated[str, typer.Argument(help="Config file name.")] +): print(download_service._get_download_by_config(config_name)) + @app.command("remove", help="Remove existing config.") def remove_download( - config_name: Annotated[str, typer.Argument(help="Config file name.")] - ): + config_name: Annotated[str, typer.Argument(help="Config file name.")] +): print(download_service._remove_download(config_name)) diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py index d2931fd6..428534ac 100644 --- a/weather_dl_v2/cli/app/subcommands/license.py +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -9,16 +9,19 @@ class LicenseValidator(Validator): pass + @app.command("list", help="List all licenses.") def get_all_license( - filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None - ): + filter: Annotated[ + str, typer.Option(help="Filter by some value. Format: filter_key=filter_value") + ] = None +): if filter: validator = LicenseValidator(valid_keys=["client_name"]) try: data = validator.validate(filters=[filter]) - client_name = data['client_name'] + client_name = data["client_name"] except Exception as e: print(f"filter error: {e}") return @@ -28,22 +31,24 @@ def get_all_license( print(license_service._get_all_license()) + @app.command("get", help="Get a particular license by ID.") -def get_license( - license: Annotated[str, typer.Argument(help="License ID.")] - ): +def get_license(license: Annotated[str, typer.Argument(help="License ID.")]): print(license_service._get_license_by_license_id(license)) + @app.command("add", help="Add new license.") def add_license( - file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for new license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')], #noqa - ): + file_path: Annotated[ + str, + typer.Argument( + help="""Input json file. Example json for new license-""" + """{"client_name" : , "number_of_requests" : , "secret_id" : }""" + ), + ], # noqa +): validator = LicenseValidator( - valid_keys=[ - "client_name", - "number_of_requests", - "secret_id" - ] + valid_keys=["client_name", "number_of_requests", "secret_id"] ) try: @@ -54,23 +59,25 @@ def add_license( print(license_service._add_license(license_dict)) + @app.command("remove", help="Remove a license.") -def remove_license( - license: Annotated[str, typer.Argument( help="License ID.")] - ): +def remove_license(license: Annotated[str, typer.Argument(help="License ID.")]): print(license_service._remove_license(license)) + @app.command("update", help="Update existing license.") def update_license( - license: Annotated[str, typer.Argument(help="License ID.")], - file_path: Annotated[str, typer.Argument(help='''Input json file. Example json for updated license- {"client_name" : , "number_of_requests" : , "secret_id" : }''')] #noqa - ): + license: Annotated[str, typer.Argument(help="License ID.")], + file_path: Annotated[ + str, + typer.Argument( + help="""Input json file. Example json for updated license- """ + """{"client_name" : , "number_of_requests" : , "secret_id" : }""" + ), + ], # noqa +): validator = LicenseValidator( - valid_keys=[ - "client_name", - "number_of_requests", - "secret_id" - ] + valid_keys=["client_name", "number_of_requests", "secret_id"] ) try: license_dict = validator.validate_json(file_path=file_path) @@ -79,4 +86,3 @@ def update_license( return print(license_service._update_license(license, license_dict)) - diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py index d22a6c87..b20a4b58 100644 --- a/weather_dl_v2/cli/app/subcommands/queue.py +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -5,20 +5,23 @@ app = typer.Typer() + class QueueValidator(Validator): pass + @app.command("list", help="List all the license queues.") def get_all_license_queue( - filter: Annotated[str, typer.Option(help="Filter by some value. Format: filter_key=filter_value")] = None - ): + filter: Annotated[ + str, typer.Option(help="Filter by some value. Format: filter_key=filter_value") + ] = None +): if filter: - validator = QueueValidator(valid_keys=["client_name"]) try: data = validator.validate(filters=[filter]) - client_name = data['client_name'] + client_name = data["client_name"] except Exception as e: print(f"filter error: {e}") return @@ -28,20 +31,42 @@ def get_all_license_queue( print(queue_service._get_all_license_queues()) + @app.command("get", help="Get queue of particular license.") def get_license_queue( - license: Annotated[str, typer.Argument(help="License ID")], - ): + license: Annotated[str, typer.Argument(help="License ID")], +): print(queue_service._get_queue_by_license(license)) -@app.command("edit", help="Edit existing license queue. Queue can edited via a priority file or my moving a single config to a given priority.") #noqa -def modify_license_queue( - license: Annotated[str, typer.Argument(help="License ID.")], - file: Annotated[str, typer.Option("--file", "-f", help='''File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}''')] = None, #noqa - config: Annotated[str, typer.Option("--config", "-c", help="Config name for absolute priority.")] = None, - priority: Annotated[int, typer.Option("--priority", "-p", help="Absolute priority for the config in a license queue. Priority increases in ascending order with 0 having highest priority.")] = None #noqa - ): +@app.command( + "edit", + help="Edit existing license queue. Queue can edited via a priority" + "file or my moving a single config to a given priority.", +) # noqa +def modify_license_queue( + license: Annotated[str, typer.Argument(help="License ID.")], + file: Annotated[ + str, + typer.Option( + "--file", + "-f", + help="""File path of priority json file. Example json: {"priority": ["c1.cfg", "c2.cfg",...]}""", + ), + ] = None, # noqa + config: Annotated[ + str, typer.Option("--config", "-c", help="Config name for absolute priority.") + ] = None, + priority: Annotated[ + int, + typer.Option( + "--priority", + "-p", + help="Absolute priority for the config in a license queue." + "Priority increases in ascending order with 0 having highest priority.", + ), + ] = None, # noqa +): if file is None and (config is None and priority is None): print("Priority file or config name with absolute priority must be passed.") return @@ -71,6 +96,3 @@ def modify_license_queue( else: print("--config & --priority arguments should be used together.") return - - - diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 84119b57..583b690d 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -6,9 +6,9 @@ logger = logging.getLogger(__name__) + @dataclasses.dataclass class Validator(abc.ABC): - valid_keys: t.List[str] def validate(self, filters: t.List[str], show_valid_filters=True): @@ -17,7 +17,7 @@ def validate(self, filters: t.List[str], show_valid_filters=True): for filter in filters: _filter = filter.split("=") - if(len(_filter)!=2): + if len(_filter) != 2: if show_valid_filters: logger.info(f"valid filters are: {self.valid_keys}.") raise ValueError("Incorrect Filter. Please Try again.") diff --git a/weather_dl_v2/cli/setup.py b/weather_dl_v2/cli/setup.py index b749f5e1..66cba004 100644 --- a/weather_dl_v2/cli/setup.py +++ b/weather_dl_v2/cli/setup.py @@ -1,17 +1,15 @@ from setuptools import setup -requirements = ['typer', 'requests'] +requirements = ["typer", "requests"] setup( - name = "weather-dl-v2", + name="weather-dl-v2", packages=["app", "app.subcommands", "app.services"], install_requires=requirements, - version = "0.0.1", - author = "aniket", - description = ("This cli tools helps in interacting with weather dl v2 fast API server."), - entry_points={ - "console_scripts": [ - "weather-dl-v2=app.main:app" - ] - } + version="0.0.1", + author="aniket", + description=( + "This cli tools helps in interacting with weather dl v2 fast API server." + ), + entry_points={"console_scripts": ["weather-dl-v2=app.main:app"]}, ) diff --git a/weather_dl_v2/downloader_kubernetes/README.md b/weather_dl_v2/downloader_kubernetes/README.md index 0783740d..49643b82 100644 --- a/weather_dl_v2/downloader_kubernetes/README.md +++ b/weather_dl_v2/downloader_kubernetes/README.md @@ -1,21 +1,21 @@ # Deployment / Usage Instruction -* **User authorization required to set up the environment**: +### User authorization required to set up the environment: * roles/container.admin -* **Authorization needed for the tool to operate**: +### Authorization needed for the tool to operate: We are not configuring any service account here hence make sure that compute engine default service account have roles: * roles/storage.admin * roles/bigquery.dataEditor * roles/bigquery.jobUser -* **Write the manifest location path** +### Write the manifest location path ``` Please write down the manifest path at Line 43 of downloader.py. Eg: "fs://test_manifest?projectId=XXX" ``` -* **Create docker image for downloader**: +### Create docker image for downloader: ``` export REPO= eg:weather-tools diff --git a/weather_dl_v2/downloader_kubernetes/downloader.py b/weather_dl_v2/downloader_kubernetes/downloader.py index d27db2d7..e0acbb53 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader.py +++ b/weather_dl_v2/downloader_kubernetes/downloader.py @@ -8,6 +8,7 @@ from util import copy, download_with_aria2 import datetime + def download(url: str, path: str) -> None: """Download data from client, with retries.""" if path: @@ -32,24 +33,25 @@ def main(config_name, dataset, selection, user_id, url, target_path) -> None: precise_download_start_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) manifest.prev_stage_precise_start_time = precise_download_start_time - print(f'Downloading data for {target_path!r}.') + print(f"Downloading data for {target_path!r}.") download(url, temp_name) - print(f'Download completed for {target_path!r}.') + print(f"Download completed for {target_path!r}.") manifest.set_stage(Stage.UPLOAD) precise_upload_start_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) manifest.prev_stage_precise_start_time = precise_upload_start_time - print(f'Uploading to store for {target_path!r}.') + print(f"Uploading to store for {target_path!r}.") copy(temp_name, target_path) - print(f'Upload to store complete for {target_path!r}.') + print(f"Upload to store complete for {target_path!r}.") os.unlink(temp_name) -if __name__ == '__main__': + +if __name__ == "__main__": main(*sys.argv[1:]) diff --git a/weather_dl_v2/downloader_kubernetes/manifest.py b/weather_dl_v2/downloader_kubernetes/manifest.py index e082bf8e..185e3473 100644 --- a/weather_dl_v2/downloader_kubernetes/manifest.py +++ b/weather_dl_v2/downloader_kubernetes/manifest.py @@ -16,7 +16,7 @@ get_file_size, get_wait_interval, generate_md5_hash, - GLOBAL_COVERAGE_AREA + GLOBAL_COVERAGE_AREA, ) import firebase_admin @@ -26,11 +26,12 @@ from google.cloud.firestore_v1.types import WriteResult """An implementation-dependent Manifest URI.""" -Location = t.NewType('Location', str) +Location = t.NewType("Location", str) class ManifestException(Exception): """Errors that occur in Manifest Clients.""" + pass @@ -46,10 +47,11 @@ class Stage(enum.Enum): retrieve : In case of clients where there is no proper separation of fetch & download stages (eg. CDS client), request will be in the retrieve stage i.e. fetch + download. """ - RETRIEVE = 'retrieve' - FETCH = 'fetch' - DOWNLOAD = 'download' - UPLOAD = 'upload' + + RETRIEVE = "retrieve" + FETCH = "fetch" + DOWNLOAD = "download" + UPLOAD = "upload" class Status(enum.Enum): @@ -62,14 +64,15 @@ class Status(enum.Enum): success : This represents the request state execution completed successfully without any error. failure : This represents the request state execution failed. """ - SCHEDULED = 'scheduled' - IN_PROGRESS = 'in-progress' - SUCCESS = 'success' - FAILURE = 'failure' + + SCHEDULED = "scheduled" + IN_PROGRESS = "in-progress" + SUCCESS = "success" + FAILURE = "failure" @dataclasses.dataclass -class DownloadStatus(): +class DownloadStatus: """Data recorded in `Manifest`s reflecting the status of a download.""" """The name of the config file associated with the request.""" @@ -130,13 +133,13 @@ class DownloadStatus(): upload_end_time: t.Optional[str] = "" @classmethod - def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus': + def from_dict(cls, download_status: t.Dict) -> "DownloadStatus": """Instantiate DownloadStatus dataclass from dict.""" download_status_instance = cls() for key, value in download_status.items(): - if key == 'status': + if key == "status": setattr(download_status_instance, key, Status(value)) - elif key == 'stage' and value is not None: + elif key == "stage" and value is not None: setattr(download_status_instance, key, Stage(value)) else: setattr(download_status_instance, key, value) @@ -154,7 +157,7 @@ def to_dict(cls, instance) -> t.Dict: download_status_dict[key] = value.value elif isinstance(value, pd.Timestamp): download_status_dict[key] = value.isoformat() - elif key == 'selection' and value is not None: + elif key == "selection" and value is not None: download_status_dict[key] = json.dumps(value) else: download_status_dict[key] = value @@ -205,76 +208,107 @@ def __post_init__(self): """Initialize the manifest.""" pass - def schedule(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def schedule( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Indicate that a job has been scheduled for download. 'scheduled' jobs occur before 'in-progress', 'success' or 'finished'. """ - scheduled_time = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat(timespec='seconds') + scheduled_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec="seconds") + ) self.status = DownloadStatus( - config_name=config_name, - dataset=dataset if dataset else None, - selection=selection, - location=location, - area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), - username=user, - stage=None, - status=Status.SCHEDULED, - error=None, - size=None, - scheduled_time=scheduled_time, - retrieve_start_time=None, - retrieve_end_time=None, - fetch_start_time=None, - fetch_end_time=None, - download_start_time=None, - download_end_time=None, - upload_start_time=None, - upload_end_time=None, - ) + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get("area", GLOBAL_COVERAGE_AREA)), + username=user, + stage=None, + status=Status.SCHEDULED, + error=None, + size=None, + scheduled_time=scheduled_time, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=None, + upload_end_time=None, + ) self._update(self.status) - def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def skip( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Updates the manifest to mark the shards that were skipped in the current job as 'upload' stage and 'success' status, indicating that they have already been downloaded. """ old_status = self._read(location) # The manifest needs to be updated for a skipped shard if its entry is not present, or # if the stage is not 'upload', or if the stage is 'upload' but the status is not 'success'. - if old_status.location != location or old_status.stage != Stage.UPLOAD or old_status.status != Status.SUCCESS: + if ( + old_status.location != location + or old_status.stage != Stage.UPLOAD + or old_status.status != Status.SUCCESS + ): current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) size = get_file_size(location) status = DownloadStatus( - config_name=config_name, - dataset=dataset if dataset else None, - selection=selection, - location=location, - area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), - username=user, - stage=Stage.UPLOAD, - status=Status.SUCCESS, - error=None, - size=size, - scheduled_time=None, - retrieve_start_time=None, - retrieve_end_time=None, - fetch_start_time=None, - fetch_end_time=None, - download_start_time=None, - download_end_time=None, - upload_start_time=current_utc_time, - upload_end_time=current_utc_time, - ) + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get("area", GLOBAL_COVERAGE_AREA)), + username=user, + stage=Stage.UPLOAD, + status=Status.SUCCESS, + error=None, + size=size, + scheduled_time=None, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=current_utc_time, + upload_end_time=current_utc_time, + ) self._update(status) - print(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + print( + f"Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}." + ) - def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def _set_for_transaction( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Reset Manifest state in preparation for a new transaction.""" self.status = dataclasses.replace(self._read(location)) self.status.config_name = config_name @@ -294,7 +328,7 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: else: status = Status.FAILURE # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception - error = '\n'.join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + error = "\n".join(traceback.format_exception(exc_type, exc_inst, exc_tb)) new_status = dataclasses.replace(self.status) new_status.error = error @@ -302,7 +336,7 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) # This is necessary for setting the precise start time of the previous stage @@ -326,7 +360,14 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: self._update(self.status) - def transact(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> 'Manifest': + def transact( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> "Manifest": """Create a download transaction.""" self._set_for_transaction(config_name, dataset, selection, location, user) return self @@ -339,7 +380,7 @@ def set_stage(self, stage: Stage) -> None: current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) if stage == Stage.DOWNLOAD: @@ -389,10 +430,12 @@ def _get_db(self) -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - print('Initialized Firebase App.') + print("Initialized Firebase App.") if attempts > 4: - raise ManifestException('Exceeded number of retries to get firestore client.') from e + raise ManifestException( + "Exceeded number of retries to get firestore client." + ) from e time.sleep(get_wait_interval(attempts)) @@ -406,9 +449,7 @@ def _read(self, location: str) -> DownloadStatus: doc_id = generate_md5_hash(location) # Update document with download status - download_doc_ref = ( - self.root_document_for_store(doc_id) - ) + download_doc_ref = self.root_document_for_store(doc_id) result = download_doc_ref.get() row = {} @@ -419,24 +460,24 @@ def _read(self, location: str) -> DownloadStatus: def _update(self, download_status: DownloadStatus) -> None: """Update or create a download status record.""" - print('Updating Firestore Manifest.') + print("Updating Firestore Manifest.") status = DownloadStatus.to_dict(download_status) - doc_id = generate_md5_hash(status['location']) + doc_id = generate_md5_hash(status["location"]) # Update document with download status - download_doc_ref = ( - self.root_document_for_store(doc_id) - ) + download_doc_ref = self.root_document_for_store(doc_id) result: WriteResult = download_doc_ref.set(status) - print(f'Firestore manifest updated. ' - f'update_time={result.update_time}, ' - f'filename={download_status.location}.') + print( + f"Firestore manifest updated. " + f"update_time={result.update_time}, " + f"filename={download_status.location}." + ) def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" # TODO: Get user-defined collection for manifest. - root_collection = 'test_manifest' + root_collection = "test_manifest" return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/downloader_kubernetes/util.py b/weather_dl_v2/downloader_kubernetes/util.py index 7b8b128d..a7e3fcc4 100644 --- a/weather_dl_v2/downloader_kubernetes/util.py +++ b/weather_dl_v2/downloader_kubernetes/util.py @@ -22,7 +22,9 @@ GLOBAL_COVERAGE_AREA = [90, -180, -90, 180] -def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: +def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter( + exception, +) -> bool: if isinstance(exception, socket.timeout): return True if isinstance(exception, TimeoutError): @@ -34,6 +36,7 @@ def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exceptio class _FakeClock: + def sleep(self, value): pass @@ -43,7 +46,7 @@ def retry_with_exponential_backoff(fun): clock = retry.Clock() # Use a fake clock only during test time... - if 'unittest' in sys.modules.keys(): + if "unittest" in sys.modules.keys(): clock = _FakeClock() return retry.with_exponential_backoff( @@ -70,9 +73,11 @@ def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]: def copy(src: str, dst: str) -> None: """Copy data via `gsutil cp`.""" try: - subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) + subprocess.run(["gsutil", "cp", src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: - print(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + print( + f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}' + ) raise @@ -80,7 +85,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - print('Serializing to JSON') + print("Serializing to JSON") if pd.isna(value) or value is None: return None @@ -89,7 +94,11 @@ def to_json_serializable_type(value: t.Any) -> t.Any: elif type(value) == np.ndarray: # Will return a scaler if array is of size 1, else will return a list. return value.tolist() - elif type(value) == datetime.datetime or type(value) == str or type(value) == np.datetime64: + elif ( + type(value) == datetime.datetime + or type(value) == str + or type(value) == np.datetime64 + ): # Assume strings are ISO format timestamps... try: value = datetime.datetime.fromisoformat(value) @@ -112,7 +121,7 @@ def to_json_serializable_type(value: t.Any) -> t.Any: return value.replace(tzinfo=datetime.timezone.utc).isoformat() elif type(value) == np.timedelta64: # Return time delta in seconds. - return float(value / np.timedelta64(1, 's')) + return float(value / np.timedelta64(1, "s")) # This check must happen after processing np.timedelta64 and np.datetime64. elif np.issubdtype(type(value), np.integer): return int(value) @@ -125,13 +134,13 @@ def fetch_geo_polygon(area: t.Union[list, str]) -> str: # Ref: https://confluence.ecmwf.int/pages/viewpage.action?pageId=151520973 if isinstance(area, str): # European area - if area == 'E': + if area == "E": area = [73.5, -27, 33, 45] # Global area - elif area == 'G': + elif area == "G": area = GLOBAL_COVERAGE_AREA else: - raise RuntimeError(f'Not a valid value for area in config: {area}.') + raise RuntimeError(f"Not a valid value for area in config: {area}.") n, w, s, e = [float(x) for x in area] if s < LATITUDE_RANGE[0]: @@ -153,22 +162,24 @@ def fetch_geo_polygon(area: t.Union[list, str]) -> str: def get_file_size(path: str) -> float: parsed_gcs_path = urlparse(path) - if parsed_gcs_path.scheme != 'gs' or parsed_gcs_path.netloc == '': - return os.stat(path).st_size / (1024 ** 3) if os.path.exists(path) else 0 + if parsed_gcs_path.scheme != "gs" or parsed_gcs_path.netloc == "": + return os.stat(path).st_size / (1024**3) if os.path.exists(path) else 0 else: - return gcsio.GcsIO().size(path) / (1024 ** 3) if gcsio.GcsIO().exists(path) else 0 + return ( + gcsio.GcsIO().size(path) / (1024**3) if gcsio.GcsIO().exists(path) else 0 + ) def get_wait_interval(num_retries: int = 0) -> float: """Returns next wait interval in seconds, using an exponential backoff algorithm.""" if 0 == num_retries: return 0 - return 2 ** num_retries + return 2**num_retries def generate_md5_hash(input: str) -> str: """Generates md5 hash for the input string.""" - return hashlib.md5(input.encode('utf-8')).hexdigest() + return hashlib.md5(input.encode("utf-8")).hexdigest() def download_with_aria2(url: str, path: str) -> None: @@ -177,9 +188,24 @@ def download_with_aria2(url: str, path: str) -> None: dir_path, file_name = os.path.split(path) try: subprocess.run( - ['aria2c', '-x', '16', '-s', '16', url, '-d', dir_path, '-o', file_name, '--allow-overwrite'], + [ + "aria2c", + "-x", + "16", + "-s", + "16", + url, + "-d", + dir_path, + "-o", + file_name, + "--allow-overwrite", + ], check=True, - capture_output=True) + capture_output=True, + ) except subprocess.CalledProcessError as e: - print(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + print( + f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}' + ) raise diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 805b8991..8d6acb33 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -1,13 +1,13 @@ # Deployment Instructions & General Notes -* **How to create environment:** +### How to create environment: ``` conda env create --name weather-dl-v2-server --file=environment.yml conda activate weather-dl-v2-server ``` -* **To run fastapi server:** +### To run fastapi server: ``` uvicorn main:app --reload ``` @@ -15,13 +15,13 @@ uvicorn main:app --reload * Open your browser at http://127.0.0.1:8000. -* **Add path of created license deployment image in license_dep/license_deployment.yaml**: +### Add path of created license deployment image in license_dep/license_deployment.yaml: ``` Please write down the license deployment's docker image path at Line 22 of license_deployment.yaml. ``` -* **Create docker image for server**: +### Create docker image for server: ``` export PROJECT_ID= export REPO= eg:weather-tools @@ -29,22 +29,22 @@ export REPO= eg:weather-tools gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-server" --timeout=79200 --machine-type=e2-highcpu-32 ``` -* **Add path of created server image in server.yaml**: +### Add path of created server image in server.yaml: ``` Please write down the fastAPI server's docker image path at Line 42 of server.yaml. ``` -* **Deploy fastapi server on kubernetes:** +### Deploy fastapi server on kubernetes: ``` kubectl apply -f server.yaml --force ``` ## General Commands -* **For viewing the current pods**: +### For viewing the current pods: ``` kubectl get pods ``` -* **For deleting existing deployment**: +### For deleting existing deployment: ``` kubectl delete -f server.yaml --force \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/config_processing/config.py b/weather_dl_v2/fastapi-server/config_processing/config.py index 1143ce61..2677a60c 100644 --- a/weather_dl_v2/fastapi-server/config_processing/config.py +++ b/weather_dl_v2/fastapi-server/config_processing/config.py @@ -3,7 +3,7 @@ import dataclasses import typing as t -Values = t.Union[t.List['Values'], t.Dict[str, 'Values'], bool, int, float, str] # pytype: disable=not-supported-yet +Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet @dataclasses.dataclass @@ -49,7 +49,7 @@ class Config: selection: t.Dict[str, Values] = dataclasses.field(default_factory=dict) @classmethod - def from_dict(cls, config: t.Dict) -> 'Config': + def from_dict(cls, config: t.Dict) -> "Config": config_instance = cls() for section_key, section_value in config.items(): if section_key == "parameters": @@ -70,32 +70,36 @@ def optimize_selection_partition(selection: t.Dict) -> t.Dict: """ selection_ = copy.deepcopy(selection) - if 'day' in selection_.keys() and selection_['day'] == 'all': - year, month = selection_['year'], selection_['month'] + if "day" in selection_.keys() and selection_["day"] == "all": + year, month = selection_["year"], selection_["month"] - multiples_error = "Cannot use keyword 'all' on selections with multiple '{type}'s." + multiples_error = ( + "Cannot use keyword 'all' on selections with multiple '{type}'s." + ) if isinstance(year, list): - assert len(year) == 1, multiples_error.format(type='year') + assert len(year) == 1, multiples_error.format(type="year") year = year[0] if isinstance(month, list): - assert len(month) == 1, multiples_error.format(type='month') + assert len(month) == 1, multiples_error.format(type="month") month = month[0] if isinstance(year, str): - assert '/' not in year, multiples_error.format(type='year') + assert "/" not in year, multiples_error.format(type="year") if isinstance(month, str): - assert '/' not in month, multiples_error.format(type='month') + assert "/" not in month, multiples_error.format(type="month") year, month = int(year), int(month) _, n_days_in_month = calendar.monthrange(year, month) - selection_['date'] = f'{year:04d}-{month:02d}-01/to/{year:04d}-{month:02d}-{n_days_in_month:02d}' - del selection_['day'] - del selection_['month'] - del selection_['year'] + selection_[ + "date" + ] = f"{year:04d}-{month:02d}-01/to/{year:04d}-{month:02d}-{n_days_in_month:02d}" + del selection_["day"] + del selection_["month"] + del selection_["year"] return selection_ diff --git a/weather_dl_v2/fastapi-server/config_processing/manifest.py b/weather_dl_v2/fastapi-server/config_processing/manifest.py index 122f6802..444dd933 100644 --- a/weather_dl_v2/fastapi-server/config_processing/manifest.py +++ b/weather_dl_v2/fastapi-server/config_processing/manifest.py @@ -17,7 +17,7 @@ get_file_size, get_wait_interval, generate_md5_hash, - GLOBAL_COVERAGE_AREA + GLOBAL_COVERAGE_AREA, ) import firebase_admin @@ -29,12 +29,14 @@ from database.session import Database """An implementation-dependent Manifest URI.""" -Location = t.NewType('Location', str) +Location = t.NewType("Location", str) logger = logging.getLogger(__name__) + class ManifestException(Exception): """Errors that occur in Manifest Clients.""" + pass @@ -50,10 +52,11 @@ class Stage(enum.Enum): retrieve : In case of clients where there is no proper separation of fetch & download stages (eg. CDS client), request will be in the retrieve stage i.e. fetch + download. """ - RETRIEVE = 'retrieve' - FETCH = 'fetch' - DOWNLOAD = 'download' - UPLOAD = 'upload' + + RETRIEVE = "retrieve" + FETCH = "fetch" + DOWNLOAD = "download" + UPLOAD = "upload" class Status(enum.Enum): @@ -66,14 +69,15 @@ class Status(enum.Enum): success : This represents the request state execution completed successfully without any error. failure : This represents the request state execution failed. """ - SCHEDULED = 'scheduled' - IN_PROGRESS = 'in-progress' - SUCCESS = 'success' - FAILURE = 'failure' + + SCHEDULED = "scheduled" + IN_PROGRESS = "in-progress" + SUCCESS = "success" + FAILURE = "failure" @dataclasses.dataclass -class DownloadStatus(): +class DownloadStatus: """Data recorded in `Manifest`s reflecting the status of a download.""" """The name of the config file associated with the request.""" @@ -134,13 +138,13 @@ class DownloadStatus(): upload_end_time: t.Optional[str] = "" @classmethod - def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus': + def from_dict(cls, download_status: t.Dict) -> "DownloadStatus": """Instantiate DownloadStatus dataclass from dict.""" download_status_instance = cls() for key, value in download_status.items(): - if key == 'status': + if key == "status": setattr(download_status_instance, key, Status(value)) - elif key == 'stage' and value is not None: + elif key == "stage" and value is not None: setattr(download_status_instance, key, Stage(value)) else: setattr(download_status_instance, key, value) @@ -158,7 +162,7 @@ def to_dict(cls, instance) -> t.Dict: download_status_dict[key] = value.value elif isinstance(value, pd.Timestamp): download_status_dict[key] = value.isoformat() - elif key == 'selection' and value is not None: + elif key == "selection" and value is not None: download_status_dict[key] = json.dumps(value) else: download_status_dict[key] = value @@ -209,76 +213,107 @@ def __post_init__(self): """Initialize the manifest.""" pass - def schedule(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def schedule( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Indicate that a job has been scheduled for download. 'scheduled' jobs occur before 'in-progress', 'success' or 'finished'. """ - scheduled_time = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat(timespec='seconds') + scheduled_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec="seconds") + ) self.status = DownloadStatus( - config_name=config_name, - dataset=dataset if dataset else None, - selection=selection, - location=location, - area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), - username=user, - stage=None, - status=Status.SCHEDULED, - error=None, - size=None, - scheduled_time=scheduled_time, - retrieve_start_time=None, - retrieve_end_time=None, - fetch_start_time=None, - fetch_end_time=None, - download_start_time=None, - download_end_time=None, - upload_start_time=None, - upload_end_time=None, - ) + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get("area", GLOBAL_COVERAGE_AREA)), + username=user, + stage=None, + status=Status.SCHEDULED, + error=None, + size=None, + scheduled_time=scheduled_time, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=None, + upload_end_time=None, + ) self._update(self.status) - def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def skip( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Updates the manifest to mark the shards that were skipped in the current job as 'upload' stage and 'success' status, indicating that they have already been downloaded. """ old_status = self._read(location) # The manifest needs to be updated for a skipped shard if its entry is not present, or # if the stage is not 'upload', or if the stage is 'upload' but the status is not 'success'. - if old_status.location != location or old_status.stage != Stage.UPLOAD or old_status.status != Status.SUCCESS: + if ( + old_status.location != location + or old_status.stage != Stage.UPLOAD + or old_status.status != Status.SUCCESS + ): current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) size = get_file_size(location) status = DownloadStatus( - config_name=config_name, - dataset=dataset if dataset else None, - selection=selection, - location=location, - area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), - username=user, - stage=Stage.UPLOAD, - status=Status.SUCCESS, - error=None, - size=size, - scheduled_time=None, - retrieve_start_time=None, - retrieve_end_time=None, - fetch_start_time=None, - fetch_end_time=None, - download_start_time=None, - download_end_time=None, - upload_start_time=current_utc_time, - upload_end_time=current_utc_time, - ) + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get("area", GLOBAL_COVERAGE_AREA)), + username=user, + stage=Stage.UPLOAD, + status=Status.SUCCESS, + error=None, + size=size, + scheduled_time=None, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=current_utc_time, + upload_end_time=current_utc_time, + ) self._update(status) - logger.info(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + logger.info( + f"Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}." + ) - def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def _set_for_transaction( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Reset Manifest state in preparation for a new transaction.""" self.status = dataclasses.replace(self._read(location)) self.status.config_name = config_name @@ -298,7 +333,7 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: else: status = Status.FAILURE # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception - error = '\n'.join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + error = "\n".join(traceback.format_exception(exc_type, exc_inst, exc_tb)) new_status = dataclasses.replace(self.status) new_status.error = error @@ -306,7 +341,7 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) # This is necessary for setting the precise start time of the previous stage @@ -330,7 +365,14 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: self._update(self.status) - def transact(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> 'Manifest': + def transact( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> "Manifest": """Create a download transaction.""" self._set_for_transaction(config_name, dataset, selection, location, user) return self @@ -344,7 +386,7 @@ def set_stage(self, stage: Stage) -> None: current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) if stage == Stage.FETCH: @@ -404,10 +446,12 @@ def _get_db(self) -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - logger.info('Initialized Firebase App.') + logger.info("Initialized Firebase App.") if attempts > 4: - raise ManifestException('Exceeded number of retries to get firestore client.') from e + raise ManifestException( + "Exceeded number of retries to get firestore client." + ) from e time.sleep(get_wait_interval(attempts)) @@ -421,9 +465,7 @@ def _read(self, location: str) -> DownloadStatus: doc_id = generate_md5_hash(location) # Update document with download status - download_doc_ref = ( - self.root_document_for_store(doc_id) - ) + download_doc_ref = self.root_document_for_store(doc_id) result = download_doc_ref.get() row = {} @@ -434,24 +476,24 @@ def _read(self, location: str) -> DownloadStatus: def _update(self, download_status: DownloadStatus) -> None: """Update or create a download status record.""" - logger.info('Updating Firestore Manifest.') + logger.info("Updating Firestore Manifest.") status = DownloadStatus.to_dict(download_status) - doc_id = generate_md5_hash(status['location']) + doc_id = generate_md5_hash(status["location"]) # Update document with download status - download_doc_ref = ( - self.root_document_for_store(doc_id) - ) + download_doc_ref = self.root_document_for_store(doc_id) result: WriteResult = download_doc_ref.set(status) - logger.info(f'Firestore manifest updated. ' - f'update_time={result.update_time}, ' - f'filename={download_status.location}.') + logger.info( + f"Firestore manifest updated. " + f"update_time={result.update_time}, " + f"filename={download_status.location}." + ) def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" # TODO: Get user-defined collection for manifest. - root_collection = 'test_manifest' + root_collection = "test_manifest" return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/fastapi-server/config_processing/parsers.py b/weather_dl_v2/fastapi-server/config_processing/parsers.py index 08c0b956..f3447f28 100644 --- a/weather_dl_v2/fastapi-server/config_processing/parsers.py +++ b/weather_dl_v2/fastapi-server/config_processing/parsers.py @@ -12,7 +12,7 @@ from collections import OrderedDict from .config import Config -CLIENTS = ['cds', 'mars', 'ecpublic'] +CLIENTS = ["cds", "mars", "ecpublic"] def date(candidate: str) -> datetime.date: @@ -34,7 +34,7 @@ def date(candidate: str) -> datetime.date: converted = None # Parse relative day value. - if candidate.startswith('-'): + if candidate.startswith("-"): return datetime.date.today() + datetime.timedelta(days=int(candidate)) accepted_formats = ["%Y-%m-%d", "%Y%m%d", "%Y-%j"] @@ -81,9 +81,7 @@ def time(candidate: str) -> datetime.time: pass if converted is None: - raise ValueError( - f"Not a valid time: '{candidate}'. Please use valid format." - ) + raise ValueError(f"Not a valid time: '{candidate}'. Please use valid format.") return converted @@ -93,7 +91,7 @@ def day_month_year(candidate: t.Any) -> int: try: if isinstance(candidate, str) or isinstance(candidate, int): return int(candidate) - raise ValueError('must be a str or int.') + raise ValueError("must be a str or int.") except ValueError as e: raise ValueError( f"Not a valid day, month, or year value: {candidate}. Please use valid value." @@ -121,11 +119,11 @@ def validate(key: str, value: int) -> None: def typecast(key: str, value: t.Any) -> t.Any: """Type the value to its appropriate datatype.""" SWITCHER = { - 'date': date, - 'time': time, - 'day': day_month_year, - 'month': day_month_year, - 'year': day_month_year, + "date": date, + "time": time, + "day": day_month_year, + "month": day_month_year, + "year": day_month_year, } converted = SWITCHER.get(key, parse_literal)(value) validate(key, converted) @@ -177,7 +175,9 @@ def mars_range_value(token: str) -> t.Union[datetime.date, int, float]: try: return float(token) except ValueError: - raise ValueError("Token string must be an 'int', 'float', or 'datetime.date()'.") + raise ValueError( + "Token string must be an 'int', 'float', or 'datetime.date()'." + ) def mars_increment_value(token: str) -> t.Union[int, float]: @@ -220,24 +220,24 @@ def parse_mars_syntax(block: str) -> t.List[str]: """ # Split into tokens, omitting empty strings. - tokens = [b.strip() for b in block.split('/') if b != ''] + tokens = [b.strip() for b in block.split("/") if b != ""] # Return list if no range operators are present. - if 'to' not in tokens and 'by' not in tokens: + if "to" not in tokens and "by" not in tokens: return tokens # Parse range values, honoring 'to' and 'by' operators. try: - to_idx = tokens.index('to') + to_idx = tokens.index("to") assert to_idx != 0, "There must be a start token." start_token, end_token = tokens[to_idx - 1], tokens[to_idx + 1] start, end = mars_range_value(start_token), mars_range_value(end_token) # Parse increment token, or choose default increment. - increment_token = '1' + increment_token = "1" increment = 1 - if 'by' in tokens: - increment_token = tokens[tokens.index('by') + 1] + if "by" in tokens: + increment_token = tokens[tokens.index("by") + 1] increment = mars_increment_value(increment_token) except (AssertionError, IndexError, ValueError): raise SyntaxError(f"Improper range syntax in '{block}'.") @@ -249,15 +249,22 @@ def parse_mars_syntax(block: str) -> t.List[str]: f"Increments on a date range must be integer number of days, '{increment_token}' is invalid." ) return [d.strftime("%Y-%m-%d") for d in date_range(start, end, increment)] - elif (isinstance(start, float) or isinstance(end, float)) and not isinstance(increment, datetime.date): + elif (isinstance(start, float) or isinstance(end, float)) and not isinstance( + increment, datetime.date + ): # Increment can be either an int or a float. _round_places = 4 - return [str(round(x, _round_places)).zfill(len(start_token)) - for x in np.arange(start, end + increment, increment)] + return [ + str(round(x, _round_places)).zfill(len(start_token)) + for x in np.arange(start, end + increment, increment) + ] elif isinstance(start, int) and isinstance(end, int) and isinstance(increment, int): # Honor leading zeros. offset = 1 if start <= end else -1 - return [str(x).zfill(len(start_token)) for x in range(start, end + offset, increment)] + return [ + str(x).zfill(len(start_token)) + for x in range(start, end + offset, increment) + ] else: raise ValueError( f"Range tokens (start='{start_token}', end='{end_token}', increment='{increment_token}')" @@ -265,22 +272,27 @@ def parse_mars_syntax(block: str) -> t.List[str]: ) -def date_range(start: datetime.date, end: datetime.date, increment: int = 1) -> t.Iterable[datetime.date]: +def date_range( + start: datetime.date, end: datetime.date, increment: int = 1 +) -> t.Iterable[datetime.date]: """Gets a range of dates, inclusive.""" offset = 1 if start <= end else -1 - return (start + datetime.timedelta(days=x) for x in range(0, (end - start).days + offset, increment)) + return ( + start + datetime.timedelta(days=x) + for x in range(0, (end - start).days + offset, increment) + ) -def _parse_lists(config: dict, section: str = '') -> t.Dict: +def _parse_lists(config: dict, section: str = "") -> t.Dict: """Parses multiline blocks in *.cfg and *.json files as lists.""" for key, val in config.items(): # Checks str type for backward compatibility since it also support "padding": 0 in json config if not isinstance(val, str): continue - if '/' in val and 'parameters' not in section: + if "/" in val and "parameters" not in section: config[key] = parse_mars_syntax(val) - elif '\n' in val: + elif "\n" in val: config[key] = _splitlines(val) return config @@ -288,7 +300,7 @@ def _parse_lists(config: dict, section: str = '') -> t.Dict: def _number_of_replacements(s: t.Text): format_names = [v[1] for v in string.Formatter().parse(s) if v[1] is not None] - num_empty_names = len([empty for empty in format_names if empty == '']) + num_empty_names = len([empty for empty in format_names if empty == ""]) if num_empty_names != 0: num_empty_names -= 1 return len(set(format_names)) + num_empty_names @@ -298,7 +310,7 @@ def parse_subsections(config: t.Dict) -> t.Dict: """Interprets [section.subsection] as nested dictionaries in `.cfg` files.""" copy = cp.deepcopy(config) for key, val in copy.items(): - path = key.split('.') + path = key.split(".") runner = copy parent = {} p = None @@ -309,13 +321,15 @@ def parse_subsections(config: t.Dict) -> t.Dict: runner = runner[p] parent[p] = val - for_cleanup = [key for key, _ in copy.items() if '.' in key] + for_cleanup = [key for key, _ in copy.items() if "." in key] for target in for_cleanup: del copy[target] return copy -def require(condition: bool, message: str, error_type: t.Type[Exception] = ValueError) -> None: +def require( + condition: bool, message: str, error_type: t.Type[Exception] = ValueError +) -> None: """A assert-like helper that wraps text and throws an error.""" if not condition: raise error_type(textwrap.dedent(message)) @@ -326,86 +340,112 @@ def process_config(file: t.IO, config_name: str) -> Config: config = parse_config(file) require(bool(config), "Unable to parse configuration file.") - require('parameters' in config, - """ + require( + "parameters" in config, + """ 'parameters' section required in configuration file. The 'parameters' section specifies the 'client', 'dataset', 'target_path', and 'partition_key' for the API client. - Please consult the documentation for more information.""") + Please consult the documentation for more information.""", + ) - params = config.get('parameters', {}) - require('target_template' not in params, - """ + params = config.get("parameters", {}) + require( + "target_template" not in params, + """ 'target_template' is deprecated, use 'target_path' instead. - Please consult the documentation for more information.""") - require('target_path' in params, - """ + Please consult the documentation for more information.""", + ) + require( + "target_path" in params, + """ 'parameters' section requires a 'target_path' key. The 'target_path' is used to format the name of the output files. It accepts Python 3.5+ string format symbols (e.g. '{}'). The number of symbols should match the length of the 'partition_keys', as the 'partition_keys' args - are used to create the templates.""") - require('client' in params, - """ + are used to create the templates.""", + ) + require( + "client" in params, + """ 'parameters' section requires a 'client' key. Supported clients are {} - """.format(str(CLIENTS))) - require(params.get('client') in CLIENTS, - """ + """.format( + str(CLIENTS) + ), + ) + require( + params.get("client") in CLIENTS, + """ Invalid 'client' parameter. Supported clients are {} - """.format(str(CLIENTS))) - require('append_date_dirs' not in params, - """ + """.format( + str(CLIENTS) + ), + ) + require( + "append_date_dirs" not in params, + """ The current version of 'google-weather-tools' no longer supports 'append_date_dirs'! Please refer to documentation for creating date-based directory hierarchy : https://weather-tools.readthedocs.io/en/latest/Configuration.html#""" - """creating-a-date-based-directory-hierarchy.""", - NotImplementedError) - require('target_filename' not in params, - """ + """creating-a-date-based-directory-hierarchy.""", + NotImplementedError, + ) + require( + "target_filename" not in params, + """ The current version of 'google-weather-tools' no longer supports 'target_filename'! Please refer to documentation : https://weather-tools.readthedocs.io/en/latest/Configuration.html#parameters-section.""", - NotImplementedError) + NotImplementedError, + ) - partition_keys = params.get('partition_keys', list()) + partition_keys = params.get("partition_keys", list()) if isinstance(partition_keys, str): partition_keys = [partition_keys.strip()] - selection = config.get('selection', dict()) - require(all((key in selection for key in partition_keys)), - """ + selection = config.get("selection", dict()) + require( + all((key in selection for key in partition_keys)), + """ All 'partition_keys' must appear in the 'selection' section. 'partition_keys' specify how to split data for workers. Please consult - documentation for more information.""") + documentation for more information.""", + ) - num_template_replacements = _number_of_replacements(params['target_path']) + num_template_replacements = _number_of_replacements(params["target_path"]) num_partition_keys = len(partition_keys) - require(num_template_replacements == num_partition_keys, - """ + require( + num_template_replacements == num_partition_keys, + """ 'target_path' has {0} replacements. Expected {1}, since there are {1} partition keys. - """.format(num_template_replacements, num_partition_keys)) - - if 'day' in partition_keys: - require(selection['day'] != 'all', - """If 'all' is used for a selection value, it cannot appear as a partition key.""") + """.format( + num_template_replacements, num_partition_keys + ), + ) + + if "day" in partition_keys: + require( + selection["day"] != "all", + """If 'all' is used for a selection value, it cannot appear as a partition key.""", + ) # Ensure consistent lookup. - config['parameters']['partition_keys'] = partition_keys + config["parameters"]["partition_keys"] = partition_keys # Add config file name. - config['parameters']['config_name'] = config_name + config["parameters"]["config_name"] = config_name # Ensure the cartesian-cross can be taken on singleton values for the partition. for key in partition_keys: @@ -417,7 +457,9 @@ def process_config(file: t.IO, config_name: str) -> Config: def prepare_target_name(config: Config) -> str: """Returns name of target location.""" - partition_dict = OrderedDict((key, typecast(key, config.selection[key][0])) for key in config.partition_keys) + partition_dict = OrderedDict( + (key, typecast(key, config.selection[key][0])) for key in config.partition_keys + ) target = config.target_path.format(*partition_dict.values(), **partition_dict) return target @@ -444,5 +486,8 @@ def get_subsections(config: Config) -> t.List[t.Tuple[str, t.Dict]]: api_url=UUUUU3 ``` """ - return [(name, params) for name, params in config.kwargs.items() - if isinstance(params, dict)] or [('default', {})] + return [ + (name, params) + for name, params in config.kwargs.items() + if isinstance(params, dict) + ] or [("default", {})] diff --git a/weather_dl_v2/fastapi-server/config_processing/partition.py b/weather_dl_v2/fastapi-server/config_processing/partition.py index 282e1fa5..6b19ff31 100644 --- a/weather_dl_v2/fastapi-server/config_processing/partition.py +++ b/weather_dl_v2/fastapi-server/config_processing/partition.py @@ -11,8 +11,9 @@ logger = logging.getLogger(__name__) + @dataclasses.dataclass -class PartitionConfig(): +class PartitionConfig: """Partition a config into multiple data requests. Partitioning involves four main operations: First, we fan-out shards based on @@ -63,8 +64,14 @@ def skip_partition(self, config: Config) -> bool: target = prepare_target_name(config) if self.store.exists(target): - logger.info(f'file {target} found, skipping.') - self.manifest.skip(config.config_name, config.dataset, config.selection, target, config.user_id) + logger.info(f"file {target} found, skipping.") + self.manifest.skip( + config.config_name, + config.dataset, + config.selection, + target, + config.user_id, + ) return True return False @@ -81,7 +88,9 @@ def prepare_partitions(self) -> t.Iterator[Config]: Returns: An iterator of `Config`s. """ - for option in itertools.product(*[self.config.selection[key] for key in self.config.partition_keys]): + for option in itertools.product( + *[self.config.selection[key] for key in self.config.partition_keys] + ): yield self._create_partition_config(option) def new_downloads_only(self, candidate: Config) -> bool: @@ -95,6 +104,11 @@ def new_downloads_only(self, candidate: Config) -> bool: def update_manifest_collection(self, partition: Config) -> Config: """Updates the DB.""" location = prepare_target_name(partition) - self.manifest.schedule(partition.config_name, partition.dataset, - partition.selection, location, partition.user_id) - logger.info(f'Created partition {location!r}.') + self.manifest.schedule( + partition.config_name, + partition.dataset, + partition.selection, + location, + partition.user_id, + ) + logger.info(f"Created partition {location!r}.") diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index cca9df59..836f24f1 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -9,11 +9,12 @@ download_handler = get_download_handler() queue_handler = get_queue_handler() + def start_processing_config(config_file, licenses): config = {} manifest = FirestoreManifest() - with open(config_file, 'r', encoding='utf-8') as f: + with open(config_file, "r", encoding="utf-8") as f: # configs/example.cfg -> example.cfg config_name = os.path.split(config_file)[1] config = process_config(f, config_name) diff --git a/weather_dl_v2/fastapi-server/config_processing/stores.py b/weather_dl_v2/fastapi-server/config_processing/stores.py index 363aa49e..12f52617 100644 --- a/weather_dl_v2/fastapi-server/config_processing/stores.py +++ b/weather_dl_v2/fastapi-server/config_processing/stores.py @@ -12,11 +12,11 @@ class Store(abc.ABC): """A interface to represent where downloads are stored. - Default implementation uses Apache Beam's Filesystems. - """ + Default implementation uses Apache Beam's Filesystems. + """ @abc.abstractmethod - def open(self, filename: str, mode: str = 'r') -> t.IO: + def open(self, filename: str, mode: str = "r") -> t.IO: pass @abc.abstractmethod @@ -30,9 +30,9 @@ class InMemoryStore(Store): def __init__(self): self.store = {} - def open(self, filename: str, mode: str = 'r') -> t.IO: + def open(self, filename: str, mode: str = "r") -> t.IO: """Create or read in-memory data.""" - if 'b' in mode: + if "b" in mode: file = io.BytesIO() else: file = io.StringIO() @@ -53,7 +53,7 @@ def __init__(self, directory: t.Optional[str] = None) -> None: if self.dir and not os.path.exists(self.dir): os.makedirs(self.dir) - def open(self, filename: str, mode: str = 'r') -> t.IO: + def open(self, filename: str, mode: str = "r") -> t.IO: """Create a temporary file in the store directory.""" return tempfile.TemporaryFile(mode, dir=self.dir) @@ -71,7 +71,7 @@ def __init__(self, directory: t.Optional[str] = None) -> None: if self.dir and not os.path.exists(self.dir): os.makedirs(self.dir) - def open(self, filename: str, mode: str = 'r') -> t.IO: + def open(self, filename: str, mode: str = "r") -> t.IO: """Open a local file from the store directory.""" return open(os.sep.join([self.dir, filename]), mode) @@ -83,7 +83,7 @@ def exists(self, filename: str) -> bool: class FSStore(Store): """Store data into any store supported by Apache Beam's FileSystems.""" - def open(self, filename: str, mode: str = 'r') -> t.IO: + def open(self, filename: str, mode: str = "r") -> t.IO: """Open object in cloud bucket (or local file system) as a read or write channel. To work with cloud storage systems, only a read or write channel can be openend @@ -92,10 +92,10 @@ def open(self, filename: str, mode: str = 'r') -> t.IO: Further, append operations, or writes on existing objects, are dissallowed (the error thrown will depend on the implementation of the underlying cloud provider). """ - if 'r' in mode and 'w' not in mode: + if "r" in mode and "w" not in mode: return FileSystems().open(filename) - if 'w' in mode and 'r' not in mode: + if "w" in mode and "r" not in mode: return FileSystems().create(filename) raise ValueError( diff --git a/weather_dl_v2/fastapi-server/config_processing/util.py b/weather_dl_v2/fastapi-server/config_processing/util.py index 825205ec..cb6544bf 100644 --- a/weather_dl_v2/fastapi-server/config_processing/util.py +++ b/weather_dl_v2/fastapi-server/config_processing/util.py @@ -25,7 +25,9 @@ logger = logging.getLogger(__name__) -def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: +def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter( + exception, +) -> bool: if isinstance(exception, socket.timeout): return True if isinstance(exception, TimeoutError): @@ -37,6 +39,7 @@ def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exceptio class _FakeClock: + def sleep(self, value): pass @@ -46,7 +49,7 @@ def retry_with_exponential_backoff(fun): clock = retry.Clock() # Use a fake clock only during test time... - if 'unittest' in sys.modules.keys(): + if "unittest" in sys.modules.keys(): clock = _FakeClock() return retry.with_exponential_backoff( @@ -73,9 +76,11 @@ def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]: def copy(src: str, dst: str) -> None: """Copy data via `gsutil cp`.""" try: - subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) + subprocess.run(["gsutil", "cp", src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: - logger.info(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + logger.info( + f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}' + ) raise @@ -83,7 +88,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - logger.info('Serializing to JSON') + logger.info("Serializing to JSON") if pd.isna(value) or value is None: return None @@ -92,7 +97,11 @@ def to_json_serializable_type(value: t.Any) -> t.Any: elif type(value) == np.ndarray: # Will return a scaler if array is of size 1, else will return a list. return value.tolist() - elif type(value) == datetime.datetime or type(value) == str or type(value) == np.datetime64: + elif ( + type(value) == datetime.datetime + or type(value) == str + or type(value) == np.datetime64 + ): # Assume strings are ISO format timestamps... try: value = datetime.datetime.fromisoformat(value) @@ -115,7 +124,7 @@ def to_json_serializable_type(value: t.Any) -> t.Any: return value.replace(tzinfo=datetime.timezone.utc).isoformat() elif type(value) == np.timedelta64: # Return time delta in seconds. - return float(value / np.timedelta64(1, 's')) + return float(value / np.timedelta64(1, "s")) # This check must happen after processing np.timedelta64 and np.datetime64. elif np.issubdtype(type(value), np.integer): return int(value) @@ -128,13 +137,13 @@ def fetch_geo_polygon(area: t.Union[list, str]) -> str: # Ref: https://confluence.ecmwf.int/pages/viewpage.action?pageId=151520973 if isinstance(area, str): # European area - if area == 'E': + if area == "E": area = [73.5, -27, 33, 45] # Global area - elif area == 'G': + elif area == "G": area = GLOBAL_COVERAGE_AREA else: - raise RuntimeError(f'Not a valid value for area in config: {area}.') + raise RuntimeError(f"Not a valid value for area in config: {area}.") n, w, s, e = [float(x) for x in area] if s < LATITUDE_RANGE[0]: @@ -156,22 +165,24 @@ def fetch_geo_polygon(area: t.Union[list, str]) -> str: def get_file_size(path: str) -> float: parsed_gcs_path = urlparse(path) - if parsed_gcs_path.scheme != 'gs' or parsed_gcs_path.netloc == '': - return os.stat(path).st_size / (1024 ** 3) if os.path.exists(path) else 0 + if parsed_gcs_path.scheme != "gs" or parsed_gcs_path.netloc == "": + return os.stat(path).st_size / (1024**3) if os.path.exists(path) else 0 else: - return gcsio.GcsIO().size(path) / (1024 ** 3) if gcsio.GcsIO().exists(path) else 0 + return ( + gcsio.GcsIO().size(path) / (1024**3) if gcsio.GcsIO().exists(path) else 0 + ) def get_wait_interval(num_retries: int = 0) -> float: """Returns next wait interval in seconds, using an exponential backoff algorithm.""" if 0 == num_retries: return 0 - return 2 ** num_retries + return 2**num_retries def generate_md5_hash(input: str) -> str: """Generates md5 hash for the input string.""" - return hashlib.md5(input.encode('utf-8')).hexdigest() + return hashlib.md5(input.encode("utf-8")).hexdigest() def download_with_aria2(url: str, path: str) -> None: @@ -180,9 +191,24 @@ def download_with_aria2(url: str, path: str) -> None: dir_path, file_name = os.path.split(path) try: subprocess.run( - ['aria2c', '-x', '16', '-s', '16', url, '-d', dir_path, '-o', file_name, '--allow-overwrite'], + [ + "aria2c", + "-x", + "16", + "-s", + "16", + url, + "-d", + dir_path, + "-o", + file_name, + "--allow-overwrite", + ], check=True, - capture_output=True) + capture_output=True, + ) except subprocess.CalledProcessError as e: - logger.info(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + logger.info( + f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}' + ) raise diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index 93b476e6..65d7eeb3 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -7,13 +7,17 @@ logger = logging.getLogger(__name__) + def get_download_handler(): return DownloadHandlerFirestore(db=get_db()) + def get_mock_download_handler(): return DownloadHandlerMock() + class DownloadHandler(abc.ABC): + @abc.abstractmethod def _start_download(self, config_name: str, client_name: str) -> None: pass @@ -26,15 +30,21 @@ def _stop_download(self, config_name: str) -> None: def _check_download_exists(self, config_name: str) -> bool: pass + class DownloadHandlerMock(DownloadHandler): + def __init__(self): pass def _start_download(self, config_name: str, client_name: str) -> None: - logger.info(f"Added {config_name} in 'download' collection. Update_time: 000000.") + logger.info( + f"Added {config_name} in 'download' collection. Update_time: 000000." + ) def _stop_download(self, config_name: str) -> None: - logger.info(f"Removed {config_name} in 'download' collection. Update_time: 000000.") + logger.info( + f"Removed {config_name} in 'download' collection. Update_time: 000000." + ) def _check_download_exists(self, config_name: str) -> bool: if config_name == "no_exist": @@ -44,22 +54,32 @@ def _check_download_exists(self, config_name: str) -> bool: else: return True + class DownloadHandlerFirestore(DownloadHandler): + def __init__(self, db: firestore.firestore.Client): self.db = db self.collection = "download" def _start_download(self, config_name: str, client_name: str) -> None: - result: WriteResult = self.db.collection('download').document(config_name).set( - {'config_name': config_name, 'client_name': client_name} - ) + result: WriteResult = ( + self.db.collection("download") + .document(config_name) + .set({"config_name": config_name, "client_name": client_name}) + ) - logger.info(f"Added {config_name} in 'download' collection. Update_time: {result.update_time}.") + logger.info( + f"Added {config_name} in 'download' collection. Update_time: {result.update_time}." + ) def _stop_download(self, config_name: str) -> None: - timestamp = self.db.collection('download').document(config_name).delete() - logger.info(f"Removed {config_name} in 'download' collection. Update_time: {timestamp}.") + timestamp = self.db.collection("download").document(config_name).delete() + logger.info( + f"Removed {config_name} in 'download' collection. Update_time: {timestamp}." + ) def _check_download_exists(self, config_name: str) -> bool: - result: DocumentSnapshot = self.db.collection('download').document(config_name).get() + result: DocumentSnapshot = ( + self.db.collection("download").document(config_name).get() + ) return result.exists diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index 53207829..ad352b01 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -7,13 +7,17 @@ logger = logging.getLogger(__name__) + def get_license_handler(): return LicenseHandlerFirestore(db=get_db()) + def get_mock_license_handler(): return LicenseHandlerMock() + class LicenseHandler(abc.ABC): + @abc.abstractmethod def _add_license(self, license_dict: dict) -> str: pass @@ -46,7 +50,9 @@ def _update_license(self, license_id: str, license_dict: dict) -> None: def _get_license_without_deployment(self) -> list: pass + class LicenseHandlerMock(LicenseHandler): + def __init__(self): pass @@ -56,10 +62,14 @@ def _add_license(self, license_dict: dict) -> str: return license_id def _delete_license(self, license_id: str) -> None: - logger.info(f"Removed {license_id} in 'license' collection. Update_time: 00000.") + logger.info( + f"Removed {license_id} in 'license' collection. Update_time: 00000." + ) def _update_license(self, license_id: str, license_dict: dict) -> None: - logger.info(f"Updated {license_id} in 'license' collection. Update_time: 00000.") + logger.info( + f"Updated {license_id} in 'license' collection. Update_time: 00000." + ) def _check_license_exists(self, license_id: str) -> bool: if license_id == "no_exists": @@ -75,7 +85,7 @@ def _get_license_by_license_id(self, license_id: str) -> dict: "secret_id": "xxxx", "client_name": "dummy_client", "k8s_deployment_id": "k1", - "number_of_requets": 100 + "number_of_requets": 100, } def _get_license_by_client_name(self, client_name: str) -> list: @@ -84,24 +94,24 @@ def _get_license_by_client_name(self, client_name: str) -> list: "secret_id": "xxxx", "client_name": client_name, "k8s_deployment_id": "k1", - "number_of_requets": 100 + "number_of_requets": 100, }] def _get_licenses(self) -> list: - return [ - { - "license_id": "L1", - "secret_id": "xxxx", - "client_name": "dummy_client", - "k8s_deployment_id": "k1", - "number_of_requets": 100 - } - ] + return [{ + "license_id": "L1", + "secret_id": "xxxx", + "client_name": "dummy_client", + "k8s_deployment_id": "k1", + "number_of_requets": 100, + }] def _get_license_without_deployment(self) -> list: return [] + class LicenseHandlerFirestore(LicenseHandler): + def __init__(self, db: firestore.firestore.Client): self.db = db self.collection = "license" @@ -110,30 +120,48 @@ def __init__(self, db: firestore.firestore.Client): def _add_license(self, license_dict: dict) -> str: license_id = f"L{len(self.db.collection(self.collection).get()) + 1}" license_dict["license_id"] = license_id - result: WriteResult = self.db.collection(self.collection).document(license_id).set( - license_dict + result: WriteResult = ( + self.db.collection(self.collection).document(license_id).set(license_dict) + ) + logger.info( + f"Added {license_id} in 'license' collection. Update_time: {result.update_time}." ) - logger.info(f"Added {license_id} in 'license' collection. Update_time: {result.update_time}.") return license_id def _delete_license(self, license_id: str) -> None: timestamp = self.db.collection(self.collection).document(license_id).delete() - logger.info(f"Removed {license_id} in 'license' collection. Update_time: {timestamp}.") + logger.info( + f"Removed {license_id} in 'license' collection. Update_time: {timestamp}." + ) def _update_license(self, license_id: str, license_dict: dict) -> None: - result: WriteResult = self.db.collection(self.collection).document(license_id).update(license_dict) - logger.info(f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}.") + result: WriteResult = ( + self.db.collection(self.collection) + .document(license_id) + .update(license_dict) + ) + logger.info( + f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}." + ) def _check_license_exists(self, license_id: str) -> bool: - result: DocumentSnapshot = self.db.collection(self.collection).document(license_id).get() + result: DocumentSnapshot = ( + self.db.collection(self.collection).document(license_id).get() + ) return result.exists def _get_license_by_license_id(self, license_id: str) -> dict: - result: DocumentSnapshot = self.db.collection(self.collection).document(license_id).get() + result: DocumentSnapshot = ( + self.db.collection(self.collection).document(license_id).get() + ) return result.to_dict() def _get_license_by_client_name(self, client_name: str) -> list: - snapshot_list = self.db.collection(self.collection).where('client_name', '==', client_name).get() + snapshot_list = ( + self.db.collection(self.collection) + .where("client_name", "==", client_name) + .get() + ) result = [] for snapshot in snapshot_list: result.append(snapshot.to_dict()) @@ -143,12 +171,21 @@ def _get_licenses(self) -> list: snapshot_list = self.db.collection(self.collection).get() result = [] for snapshot in snapshot_list: - result.append(self.db.collection(self.collection).document(snapshot.id).get().to_dict()) + result.append( + self.db.collection(self.collection) + .document(snapshot.id) + .get() + .to_dict() + ) return result def _get_license_without_deployment(self) -> list: - snapshot_list = self.db.collection(self.collection).where('k8s_deployment_id', '==', '').get() + snapshot_list = ( + self.db.collection(self.collection) + .where("k8s_deployment_id", "==", "") + .get() + ) result = [] for snapshot in snapshot_list: - result.append(snapshot.to_dict()['license_id']) + result.append(snapshot.to_dict()["license_id"]) return result diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index 0c2f11f0..8bda3fa9 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -7,13 +7,17 @@ logger = logging.getLogger(__name__) + def get_queue_handler(): return QueueHandlerFirestore(db=get_db()) + def get_mock_queue_handler(): return QueueHandlerMock() + class QueueHandler(abc.ABC): + @abc.abstractmethod def _create_license_queue(self, license_id: str, client_name: str) -> None: pass @@ -39,7 +43,9 @@ def _update_license_queue(self, license_id: str, priority_list: list) -> None: pass @abc.abstractmethod - def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: + def _update_queues_on_start_download( + self, config_name: str, licenses: list + ) -> None: pass @abc.abstractmethod @@ -47,7 +53,9 @@ def _update_queues_on_stop_download(self, config_name: str) -> None: pass @abc.abstractmethod - def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: + def _update_config_priority_in_license( + self, license_id: str, config_name: str, priority: int + ) -> None: pass @abc.abstractmethod @@ -58,54 +66,54 @@ def _create_license_queue(self, license_id: str, client_name: str) -> None: def _remove_license_queue(self, license_id: str) -> None: pass + class QueueHandlerMock(QueueHandler): + def __init__(self): pass def _create_license_queue(self, license_id: str, client_name: str) -> None: - logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: 000000.") + logger.info( + f"Added {license_id} queue in 'queues' collection. Update_time: 000000." + ) def _remove_license_queue(self, license_id: str) -> None: - logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: 000000.") + logger.info( + f"Removed {license_id} queue in 'queues' collection. Update_time: 000000." + ) def _get_queues(self) -> list: - return [ - { - "client_name": "dummy_client", - "license_id": "L1", - "queue": [] - } - ] + return [{"client_name": "dummy_client", "license_id": "L1", "queue": []}] def _get_queue_by_license_id(self, license_id: str) -> dict: if license_id == "no_exists": return None - return { - "client_name": "dummy_client", - "license_id": license_id, - "queue": [] - } + return {"client_name": "dummy_client", "license_id": license_id, "queue": []} def _get_queue_by_client_name(self, client_name: str) -> list: - return [ - { - "client_name": client_name, - "license_id": "L1", - "queue": [] - } - ] + return [{"client_name": client_name, "license_id": "L1", "queue": []}] def _update_license_queue(self, license_id: str, priority_list: list) -> None: - logger.info(f"Updated {license_id} queue in 'queues' collection. Update_time: 00000.") + logger.info( + f"Updated {license_id} queue in 'queues' collection. Update_time: 00000." + ) - def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: - logger.info(f"Updated {license} queue in 'queues' collection. Update_time: 00000.") + def _update_queues_on_start_download( + self, config_name: str, licenses: list + ) -> None: + logger.info( + f"Updated {license} queue in 'queues' collection. Update_time: 00000." + ) def _update_queues_on_stop_download(self, config_name: str) -> None: - logger.info("Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") + logger.info( + "Updated snapshot.id queue in 'queues' collection. Update_time: 00000." + ) - def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: - print(f"Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") + def _update_config_priority_in_license( + self, license_id: str, config_name: str, priority: int + ) -> None: + print("Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") def _create_license_queue(self, license_id: str, client_name: str) -> None: logger.info("Added L1 queue in 'queues' collection. Update_time: 00000.") @@ -113,78 +121,126 @@ def _create_license_queue(self, license_id: str, client_name: str) -> None: def _remove_license_queue(self, license_id: str) -> None: logger.info("Removed L1 queue in 'queues' collection. Update_time: 00000.") + class QueueHandlerFirestore(QueueHandler): + def __init__(self, db: firestore.firestore.Client, collection: str = "queues"): self.db = db self.collection = collection def _create_license_queue(self, license_id: str, client_name: str) -> None: - result: WriteResult = self.db.collection(self.collection).document(license_id).set( - {"license_id": license_id, "client_name": client_name, "queue": []} + result: WriteResult = ( + self.db.collection(self.collection) + .document(license_id) + .set({"license_id": license_id, "client_name": client_name, "queue": []}) + ) + logger.info( + f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}." ) - logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") def _remove_license_queue(self, license_id: str) -> None: timestamp = self.db.collection(self.collection).document(license_id).delete() - logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") + logger.info( + f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}." + ) def _get_queues(self) -> list: snapshot_list = self.db.collection(self.collection).get() result = [] for snapshot in snapshot_list: - result.append(self.db.collection(self.collection).document(snapshot.id).get().to_dict()) + result.append( + self.db.collection(self.collection) + .document(snapshot.id) + .get() + .to_dict() + ) return result def _get_queue_by_license_id(self, license_id: str) -> dict: - result: DocumentSnapshot = self.db.collection(self.collection).document(license_id).get() + result: DocumentSnapshot = ( + self.db.collection(self.collection).document(license_id).get() + ) return result.to_dict() def _get_queue_by_client_name(self, client_name: str) -> list: - snapshot_list = self.db.collection(self.collection).where('client_name', '==', client_name).get() + snapshot_list = ( + self.db.collection(self.collection) + .where("client_name", "==", client_name) + .get() + ) result = [] for snapshot in snapshot_list: result.append(snapshot.to_dict()) return result def _update_license_queue(self, license_id: str, priority_list: list) -> None: - result: WriteResult = self.db.collection(self.collection).document(license_id).update( - {'queue': priority_list} - ) - logger.info(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + result: WriteResult = ( + self.db.collection(self.collection) + .document(license_id) + .update({"queue": priority_list}) + ) + logger.info( + f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}." + ) - def _update_queues_on_start_download(self, config_name: str, licenses: list) -> None: + def _update_queues_on_start_download( + self, config_name: str, licenses: list + ) -> None: for license in licenses: - result: WriteResult = self.db.collection(self.collection).document(license).update( - {'queue': firestore.ArrayUnion([config_name])} + result: WriteResult = ( + self.db.collection(self.collection) + .document(license) + .update({"queue": firestore.ArrayUnion([config_name])}) + ) + logger.info( + f"Updated {license} queue in 'queues' collection. Update_time: {result.update_time}." ) - logger.info(f"Updated {license} queue in 'queues' collection. Update_time: {result.update_time}.") def _update_queues_on_stop_download(self, config_name: str) -> None: snapshot_list = self.db.collection(self.collection).get() for snapshot in snapshot_list: - result: WriteResult = self.db.collection(self.collection).document(snapshot.id).update({ - 'queue': firestore.ArrayRemove([config_name])}) - logger.info(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") + result: WriteResult = ( + self.db.collection(self.collection) + .document(snapshot.id) + .update({"queue": firestore.ArrayRemove([config_name])}) + ) + logger.info( + f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}." + ) - def _update_config_priority_in_license(self, license_id: str, config_name: str, priority: int) -> None: - snapshot: DocumentSnapshot = self.db.collection('queues').document(license_id).get() - priority_list = snapshot.to_dict()['queue'] + def _update_config_priority_in_license( + self, license_id: str, config_name: str, priority: int + ) -> None: + snapshot: DocumentSnapshot = ( + self.db.collection("queues").document(license_id).get() + ) + priority_list = snapshot.to_dict()["queue"] if config_name not in priority_list: print(f"'{config_name}' not in queue.") raise new_priority_list = [c for c in priority_list if c != config_name] new_priority_list.insert(priority, config_name) - result: WriteResult = self.db.collection('queues').document(license_id).update( - {'queue': new_priority_list} + result: WriteResult = ( + self.db.collection("queues") + .document(license_id) + .update({"queue": new_priority_list}) + ) + print( + f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}." ) - print(f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}.") def _create_license_queue(self, license_id: str, client_name: str) -> None: - result: WriteResult = self.db.collection('queues').document(license_id).set( - {"license_id": license_id, "client_name": client_name, "queue": []} + result: WriteResult = ( + self.db.collection("queues") + .document(license_id) + .set({"license_id": license_id, "client_name": client_name, "queue": []}) + ) + logger.info( + f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}." ) - logger.info(f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") def _remove_license_queue(self, license_id: str) -> None: - timestamp = self.db.collection('queues').document(license_id).delete() - logger.info(f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}.") + timestamp = self.db.collection("queues").document(license_id).delete() + logger.info( + f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}." + ) diff --git a/weather_dl_v2/fastapi-server/database/session.py b/weather_dl_v2/fastapi-server/database/session.py index 564cafe7..e40f8157 100644 --- a/weather_dl_v2/fastapi-server/database/session.py +++ b/weather_dl_v2/fastapi-server/database/session.py @@ -8,11 +8,14 @@ logger = logging.getLogger(__name__) + class Database(abc.ABC): + @abc.abstractmethod def _get_db(self): pass + def get_db() -> firestore.firestore.Client: """Acquire a firestore client, initializing the firebase app if necessary. Will attempt to get the db client five times. If it's still unsuccessful, a @@ -31,10 +34,12 @@ def get_db() -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - logger.info('Initialized Firebase App.') + logger.info("Initialized Firebase App.") if attempts > 4: - raise RuntimeError('Exceeded number of retries to get firestore client.') from e + raise RuntimeError( + "Exceeded number of retries to get firestore client." + ) from e time.sleep(get_wait_interval(attempts)) diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py index 6a285fdf..9f2dd554 100644 --- a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -13,12 +13,17 @@ def create_license_deployment(license_id: str) -> str: # Update the deployment name with a unique identifier deployment_manifest["metadata"]["name"] = deployment_name - deployment_manifest["spec"]["template"]["spec"]["containers"][0]["args"] = ["--license", license_id] + deployment_manifest["spec"]["template"]["spec"]["containers"][0]["args"] = [ + "--license", + license_id, + ] # Create an instance of the Kubernetes API client api_instance = client.AppsV1Api() # Create the deployment in the specified namespace - response = api_instance.create_namespaced_deployment(body=deployment_manifest, namespace='default') + response = api_instance.create_namespaced_deployment( + body=deployment_manifest, namespace="default" + ) print("Deployment created successfully:", response.metadata.name) return deployment_name diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index f40ee4d4..d1bd29d3 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -5,40 +5,42 @@ from fastapi import FastAPI from routers import license, download, queues from database.license_handler import get_license_handler -from license_dep.deployment_creator import create_license_deployment from routers.license import get_create_deployment ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) # set up logger. -logging.config.fileConfig('logging.conf', disable_existing_loggers=False) +logging.config.fileConfig("logging.conf", disable_existing_loggers=False) logger = logging.getLogger(__name__) + def create_pending_license_deployments(): """Creates license deployments for Licenses whose deployments does not exist.""" license_handler = get_license_handler() create_deployment = get_create_deployment() license_list = license_handler._get_license_without_deployment() - for license in license_list: + for license_id in license_list: try: - logger.info(f"Creating license deployment for {license}") - create_deployment(license) + logger.info(f"Creating license deployment for {license_id}") + create_deployment(license_id) except Exception as e: - logger.error(f"License deployment failed for {license}. Exception: {e}") + logger.error(f"License deployment failed for {license_id}. Exception: {e}") + @asynccontextmanager async def lifespan(app: FastAPI): logger.info("Started FastAPI server") # Boot up # TODO: Replace hard-coded collection name by read a server config. - + # Retrieve license information & create license deployment if needed. create_pending_license_deployments() - + yield # Clean up + app = FastAPI(lifespan=lifespan) app.include_router(license.router) diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index b538f454..59099c22 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -21,34 +21,43 @@ def upload(file: UploadFile): return upload + def get_upload_mock(): def upload(file: UploadFile): return f"{os.getcwd()}/tests/test_data/{file.filename}" - + return upload # Can submit a config to the server. @router.post("/") -def submit_download(file: UploadFile | None = None, licenses: list = [], - background_tasks: BackgroundTasks = BackgroundTasks(), - download_handler: DownloadHandler = Depends(get_download_handler), - upload = Depends(get_upload) - ): +def submit_download( + file: UploadFile | None = None, + licenses: list = [], + background_tasks: BackgroundTasks = BackgroundTasks(), + download_handler: DownloadHandler = Depends(get_download_handler), + upload=Depends(get_upload), +): if not file: raise HTTPException(status_code=404, detail="No upload file sent.") else: if download_handler._check_download_exists(file.filename): - raise HTTPException(status_code=400, - detail=f"Please stop the ongoing download of the config file '{file.filename}' " - "before attempting to start a new download.") + raise HTTPException( + status_code=400, + detail=f"Please stop the ongoing download of the config file '{file.filename}' " + "before attempting to start a new download.", + ) try: dest = upload(file) # Start processing config. background_tasks.add_task(start_processing_config, dest, licenses) - return {"message": f"file '{file.filename}' saved at '{dest}' successfully."} + return { + "message": f"file '{file.filename}' saved at '{dest}' successfully." + } except Exception: - raise HTTPException(status_code=500, detail=f"Failed to save file '{file.filename}'.") + raise HTTPException( + status_code=500, detail=f"Failed to save file '{file.filename}'." + ) # Can check the current status of the submitted config. @@ -57,40 +66,81 @@ def submit_download(file: UploadFile | None = None, licenses: list = [], async def get_downloads(client_name: str | None = None): # Get this kind of response by querying download collection + manifest collection. if client_name: - result = {"config_name": "config_3", "client_name": client_name, "total_shards": 10000, - "scheduled_shards": 4990, "downloaded_shards": 5000, "failed_shards": 0} + result = { + "config_name": "config_3", + "client_name": client_name, + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0, + } else: result = [ - {"config_name": "config_1", "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0}, - {"config_name": "config_2", "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0}, - {"config_name": "config_3", "client_name": "CDS", "total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0} + { + "config_name": "config_1", + "client_name": "MARS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0, + }, + { + "config_name": "config_2", + "client_name": "MARS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0, + }, + { + "config_name": "config_3", + "client_name": "CDS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0, + }, ] return result # Get status of particular download @router.get("/{config_name}") -async def get_download(config_name: str, download_handler: DownloadHandler = Depends(get_download_handler)): +async def get_download( + config_name: str, download_handler: DownloadHandler = Depends(get_download_handler) +): if not download_handler._check_download_exists(config_name): - raise HTTPException(status_code=404, detail="Download config not found in weather-dl v2.") + raise HTTPException( + status_code=404, detail="Download config not found in weather-dl v2." + ) # Get this kind of response by querying fake_manifest_db. - result = {"config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0} + result = { + "config_name": config_name, + "client_name": "MARS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0, + } return result # Stop & remove the execution of the config. @router.delete("/{config_name}") -async def delete_download(config_name: str, - download_handler: DownloadHandler = Depends(get_download_handler), - queue_handler: QueueHandler = Depends(get_queue_handler)): +async def delete_download( + config_name: str, + download_handler: DownloadHandler = Depends(get_download_handler), + queue_handler: QueueHandler = Depends(get_queue_handler), +): if not download_handler._check_download_exists(config_name): - raise HTTPException(status_code=404, detail="No such download config to stop & remove.") + raise HTTPException( + status_code=404, detail="No such download config to stop & remove." + ) download_handler._stop_download(config_name) queue_handler._update_queues_on_stop_download(config_name) - return {"config_name": config_name, "message": "Download config stopped & removed successfully."} + return { + "config_name": config_name, + "message": "Download config stopped & removed successfully.", + } diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index be065d10..b764786a 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -24,29 +24,39 @@ class LicenseInternal(License): responses={404: {"description": "Not found"}}, ) + def get_create_deployment(): def create_deployment(license_id: str): k8s_deployment_id = create_license_deployment(license_id) update_license_internal(license_id, k8s_deployment_id) + return create_deployment + def get_create_deployment_mock(): def create_deployment_mock(license_id: str): print("create deployment mocked") + return create_deployment_mock + def get_terminate_license_deployment(): return terminate_license_deployment + def get_terminate_license_deployment_mock(): def get_terminate_license_deployment_mock(license_id): print(f"terminating license deployment for {license_id}") + return get_terminate_license_deployment_mock # List all the license + handle filters of {client_name} @router.get("/") -async def get_licenses(client_name: str | None = None, license_handler: LicenseHandler = Depends(get_license_handler)): +async def get_licenses( + client_name: str | None = None, + license_handler: LicenseHandler = Depends(get_license_handler), +): if client_name: result = license_handler._get_license_by_client_name(client_name) else: @@ -56,7 +66,9 @@ async def get_licenses(client_name: str | None = None, license_handler: LicenseH # Get particular license @router.get("/{license_id}") -async def get_license_by_license_id(license_id: str, license_handler: LicenseHandler = Depends(get_license_handler)): +async def get_license_by_license_id( + license_id: str, license_handler: LicenseHandler = Depends(get_license_handler) +): result = license_handler._get_license_by_license_id(license_id) if not result: raise HTTPException(status_code=404, detail="License not found.") @@ -65,12 +77,13 @@ async def get_license_by_license_id(license_id: str, license_handler: LicenseHan # Update existing license @router.put("/{license_id}") -async def update_license(license_id: str, - license: License, - license_handler: LicenseHandler = Depends(get_license_handler), - create_deployment = Depends(get_create_deployment), - terminate_license_deployment = Depends(get_terminate_license_deployment) - ): +async def update_license( + license_id: str, + license: License, + license_handler: LicenseHandler = Depends(get_license_handler), + create_deployment=Depends(get_create_deployment), + terminate_license_deployment=Depends(get_terminate_license_deployment), +): if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to update.") @@ -83,9 +96,11 @@ async def update_license(license_id: str, # Add/Update k8s deployment ID for existing license (intenally). -def update_license_internal(license_id: str, - k8s_deployment_id: str, - license_handler: LicenseHandler = Depends(get_license_handler)): +def update_license_internal( + license_id: str, + k8s_deployment_id: str, + license_handler: LicenseHandler = Depends(get_license_handler), +): if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to update.") license_dict = {"k8s_deployment_id": k8s_deployment_id} @@ -93,29 +108,33 @@ def update_license_internal(license_id: str, license_handler._update_license(license_id, license_dict) return {"license_id": license_id, "message": "License updated successfully."} + # Add new license @router.post("/") -async def add_license(license: License, - background_tasks: BackgroundTasks = BackgroundTasks(), - license_handler: LicenseHandler = Depends(get_license_handler), - queue_handler: QueueHandler = Depends(get_queue_handler), - create_deployment = Depends(get_create_deployment)): +async def add_license( + license: License, + background_tasks: BackgroundTasks = BackgroundTasks(), + license_handler: LicenseHandler = Depends(get_license_handler), + queue_handler: QueueHandler = Depends(get_queue_handler), + create_deployment=Depends(get_create_deployment), +): license_dict = license.dict() - license_dict['k8s_deployment_id'] = "" + license_dict["k8s_deployment_id"] = "" license_id = license_handler._add_license(license_dict) - queue_handler._create_license_queue(license_id, license_dict['client_name']) + queue_handler._create_license_queue(license_id, license_dict["client_name"]) background_tasks.add_task(create_deployment, license_id) return {"license_id": license_id, "message": "License added successfully."} # Remove license @router.delete("/{license_id}") -async def delete_license(license_id: str, - background_tasks: BackgroundTasks = BackgroundTasks(), - license_handler: LicenseHandler = Depends(get_license_handler), - queue_handler: QueueHandler = Depends(get_queue_handler), - terminate_license_deployment = Depends(get_terminate_license_deployment) - ): +async def delete_license( + license_id: str, + background_tasks: BackgroundTasks = BackgroundTasks(), + license_handler: LicenseHandler = Depends(get_license_handler), + queue_handler: QueueHandler = Depends(get_queue_handler), + terminate_license_deployment=Depends(get_terminate_license_deployment), +): if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to delete.") license_handler._delete_license(license_id) diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py index 4056ddf6..f51d4834 100644 --- a/weather_dl_v2/fastapi-server/routers/queues.py +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -12,8 +12,10 @@ # Users can change the execution order of config per license basis. # List the licenses priority + {client_name} filter @router.get("/") -async def get_all_license_queue(client_name: str | None = None, - queue_handler: QueueHandler = Depends(get_queue_handler)): +async def get_all_license_queue( + client_name: str | None = None, + queue_handler: QueueHandler = Depends(get_queue_handler), +): if client_name: result = queue_handler._get_queue_by_client_name(client_name) else: @@ -23,7 +25,9 @@ async def get_all_license_queue(client_name: str | None = None, # Get particular license priority @router.get("/{license_id}") -async def get_license_queue(license_id: str, queue_handler: QueueHandler = Depends(get_queue_handler)): +async def get_license_queue( + license_id: str, queue_handler: QueueHandler = Depends(get_queue_handler) +): result = queue_handler._get_queue_by_license_id(license_id) if not result: raise HTTPException(status_code=404, detail="License's priority not found.") @@ -32,10 +36,12 @@ async def get_license_queue(license_id: str, queue_handler: QueueHandler = Depen # Change priority queue of particular license @router.post("/{license_id}") -def modify_license_queue(license_id: str, priority_list: list | None = [], - queue_handler: QueueHandler = Depends(get_queue_handler), - license_handler: LicenseHandler = Depends(get_license_handler) - ): +def modify_license_queue( + license_id: str, + priority_list: list | None = [], + queue_handler: QueueHandler = Depends(get_queue_handler), + license_handler: LicenseHandler = Depends(get_license_handler), +): if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="License's priority not found.") try: @@ -47,13 +53,21 @@ def modify_license_queue(license_id: str, priority_list: list | None = [], # Change config's priority in particular license @router.put("/priority/{license_id}") -def modify_config_priority_in_license(license_id: str, config_name: str, priority: int, - queue_handler: QueueHandler = Depends(get_queue_handler), - license_handler: LicenseHandler = Depends(get_license_handler)): +def modify_config_priority_in_license( + license_id: str, + config_name: str, + priority: int, + queue_handler: QueueHandler = Depends(get_queue_handler), + license_handler: LicenseHandler = Depends(get_license_handler), +): if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="License's priority not found.") try: - queue_handler._update_config_priority_in_license(license_id, config_name, priority) - return {"message": f"'{license_id}' license '{config_name}' priority updated successfully."} - except Exception as e: + queue_handler._update_config_priority_in_license( + license_id, config_name, priority + ) + return { + "message": f"'{license_id}' license '{config_name}' priority updated successfully." + } + except Exception: return {"message": f"Failed to update '{license_id}' license priority."} diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_download.py b/weather_dl_v2/fastapi-server/tests/integration/test_download.py index 6ec9fe01..e4bc8b94 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_download.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_download.py @@ -23,6 +23,7 @@ def _get_download(headers, query, code, expected): assert response.status_code == code assert response.json() == expected + def test_get_downloads_basic(): headers = {} query = {} @@ -34,7 +35,7 @@ def test_get_downloads_basic(): "total_shards": 10000, "scheduled_shards": 4990, "downloaded_shards": 5000, - "failed_shards": 0 + "failed_shards": 0, }, { "config_name": "config_2", @@ -42,7 +43,7 @@ def test_get_downloads_basic(): "total_shards": 10000, "scheduled_shards": 4990, "downloaded_shards": 5000, - "failed_shards": 0 + "failed_shards": 0, }, { "config_name": "config_3", @@ -50,8 +51,8 @@ def test_get_downloads_basic(): "total_shards": 10000, "scheduled_shards": 4990, "downloaded_shards": 5000, - "failed_shards": 0 - } + "failed_shards": 0, + }, ] _get_download(headers, query, code, expected) @@ -60,11 +61,11 @@ def test_get_downloads_basic(): def _submit_download(headers, file_path, licenses, code, expected): file = None try: - file = {"file" : open(file_path, 'rb')} + file = {"file": open(file_path, "rb")} except FileNotFoundError: print("file not found.") - payload = {"licenses" : licenses} + payload = {"licenses": licenses} response = client.post("/download", headers=headers, files=file, data=payload) @@ -73,6 +74,7 @@ def _submit_download(headers, file_path, licenses, code, expected): assert response.status_code == code assert response.json() == expected + def test_submit_download_basic(): header = { "accept": "application/json", @@ -80,10 +82,14 @@ def test_submit_download_basic(): file_path = os.path.join(ROOT_DIR, "tests/test_data/no_exist.cfg") licenses = ["L1"] code = 200 - expected = {'message': f"file 'no_exist.cfg' saved at '{os.getcwd()}/tests/test_data/no_exist.cfg' "'successfully.'} + expected = { + "message": f"file 'no_exist.cfg' saved at '{os.getcwd()}/tests/test_data/no_exist.cfg' " + "successfully." + } _submit_download(header, file_path, licenses, code, expected) + def test_submit_download_file_not_uploaded(): header = { "accept": "application/json", @@ -95,6 +101,7 @@ def test_submit_download_file_not_uploaded(): _submit_download(header, file_path, licenses, code, expected) + def test_submit_download_file_alreadys_exist(): header = { "accept": "application/json", @@ -103,8 +110,8 @@ def test_submit_download_file_alreadys_exist(): licenses = ["L1"] code = 400 expected = { - "detail": "Please stop the ongoing download of the config file 'example.cfg' before attempting to start a new download." # noqa: E501 - } + "detail": "Please stop the ongoing download of the config file 'example.cfg' before attempting to start a new download." # noqa: E501 + } _submit_download(header, file_path, licenses, code, expected) @@ -115,42 +122,55 @@ def _get_download_by_config(headers, config_name, code, expected): assert response.status_code == code assert response.json() == expected + def test_get_download_by_config_basic(): headers = {} config_name = "dummy_config" code = 200 - expected = {"config_name": config_name, "client_name": "MARS", "total_shards": 10000, "scheduled_shards": 4990, - "downloaded_shards": 5000, "failed_shards": 0} + expected = { + "config_name": config_name, + "client_name": "MARS", + "total_shards": 10000, + "scheduled_shards": 4990, + "downloaded_shards": 5000, + "failed_shards": 0, + } _get_download_by_config(headers, config_name, code, expected) + def test_get_download_by_config_wrong_config(): headers = {} config_name = "no_exist" code = 404 - expected = {'detail': 'Download config not found in weather-dl v2.'} + expected = {"detail": "Download config not found in weather-dl v2."} _get_download_by_config(headers, config_name, code, expected) def _delete_download_by_config(headers, config_name, code, expected): - response = client.delete(f"/download/{config_name}",headers=headers) + response = client.delete(f"/download/{config_name}", headers=headers) assert response.status_code == code assert response.json() == expected + def test_delete_download_by_config_basic(): headers = {} config_name = "dummy_config" code = 200 - expected = {'config_name': 'dummy_config', 'message': 'Download config stopped & removed successfully.'} + expected = { + "config_name": "dummy_config", + "message": "Download config stopped & removed successfully.", + } _delete_download_by_config(headers, config_name, code, expected) + def test_delete_download_by_config_wrong_config(): headers = {} config_name = "no_exist" code = 404 - expected = {'detail': 'No such download config to stop & remove.'} + expected = {"detail": "No such download config to stop & remove."} _delete_download_by_config(headers, config_name, code, expected) diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_license.py b/weather_dl_v2/fastapi-server/tests/integration/test_license.py index 49dfb352..e770310c 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_license.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_license.py @@ -5,11 +5,11 @@ from database.download_handler import get_download_handler, get_mock_download_handler from database.license_handler import get_license_handler, get_mock_license_handler from routers.license import ( - get_create_deployment, - get_create_deployment_mock, - get_terminate_license_deployment, - get_terminate_license_deployment_mock - ) + get_create_deployment, + get_create_deployment_mock, + get_terminate_license_deployment, + get_terminate_license_deployment_mock, +) from database.queue_handler import get_queue_handler, get_mock_queue_handler client = TestClient(app) @@ -20,7 +20,10 @@ app.dependency_overrides[get_license_handler] = get_mock_license_handler app.dependency_overrides[get_queue_handler] = get_mock_queue_handler app.dependency_overrides[get_create_deployment] = get_create_deployment_mock -app.dependency_overrides[get_terminate_license_deployment] = get_terminate_license_deployment_mock +app.dependency_overrides[ + get_terminate_license_deployment +] = get_terminate_license_deployment_mock + def _get_license(headers, query, code, expected): response = client.get("/license", headers=headers, params=query) @@ -28,61 +31,62 @@ def _get_license(headers, query, code, expected): assert response.status_code == code assert response.json() == expected + def test_get_license_basic(): headers = {} query = {} code = 200 - expected = [ - { - "license_id": "L1", - 'secret_id': 'xxxx', - "client_name": "dummy_client", - "k8s_deployment_id": "k1", - "number_of_requets": 100 - } - ] + expected = [{ + "license_id": "L1", + "secret_id": "xxxx", + "client_name": "dummy_client", + "k8s_deployment_id": "k1", + "number_of_requets": 100, + }] _get_license(headers, query, code, expected) + def test_get_license_client_name(): headers = {} client_name = "dummy_client" - query = {"client_name" : client_name} + query = {"client_name": client_name} code = 200 - expected = [ - { - "license_id": "L1", - "secret_id": "xxxx", - "client_name": client_name, - "k8s_deployment_id": "k1", - "number_of_requets": 100 - } - ] + expected = [{ + "license_id": "L1", + "secret_id": "xxxx", + "client_name": client_name, + "k8s_deployment_id": "k1", + "number_of_requets": 100, + }] _get_license(headers, query, code, expected) def _add_license(headers, payload, code, expected): - response = client.post("/license", headers=headers, data=json.dumps(payload), params={"license_id": "L1"}) + response = client.post( + "/license", + headers=headers, + data=json.dumps(payload), + params={"license_id": "L1"}, + ) assert response.status_code == code assert response.json() == expected + def test_add_license_basic(): - headers = { - "accept": "application/json", - "Content-Type" : "application/json" - } + headers = {"accept": "application/json", "Content-Type": "application/json"} license = { "client_name": "dummy_client", - "number_of_requests" : 0, + "number_of_requests": 0, "secret_id": "xxxx", - "api_email" : "email", - 'k8s_deployment_id': 'k1' + "api_email": "email", + "k8s_deployment_id": "k1", } payload = license code = 200 - expected = {'license_id': 'L1', 'message': 'License added successfully.'} + expected = {"license_id": "L1", "message": "License added successfully."} _add_license(headers, payload, code, expected) @@ -94,11 +98,9 @@ def _get_license_by_license_id(headers, license_id, code, expected): assert response.status_code == code assert response.json() == expected + def test_get_license_by_license_id(): - headers = { - "accept": "application/json", - "Content-Type" : "application/json" - } + headers = {"accept": "application/json", "Content-Type": "application/json"} license_id = "L1" code = 200 expected = { @@ -106,11 +108,12 @@ def test_get_license_by_license_id(): "secret_id": "xxxx", "client_name": "dummy_client", "k8s_deployment_id": "k1", - "number_of_requets": 100 + "number_of_requets": 100, } _get_license_by_license_id(headers, license_id, code, expected) + def test_get_license_wrong_license(): headers = {} license_id = "no_exists" @@ -123,33 +126,37 @@ def test_get_license_wrong_license(): def _update_license(headers, license_id, license, code, expected): - response = client.put(f"/license/{license_id}", headers=headers, data=json.dumps(license)) + response = client.put( + f"/license/{license_id}", headers=headers, data=json.dumps(license) + ) assert response.status_code == code assert response.json() == expected + def test_update_license_basic(): headers = {} license_id = "L1" license = { "client_name": "dummy_client", - "number_of_requests" : 0, + "number_of_requests": 0, "secret_id": "xxxx", - "api_email" : "email" + "api_email": "email", } code = 200 expected = {"license_id": license_id, "name": "License updated successfully."} _update_license(headers, license_id, license, code, expected) + def test_update_license_wrong_license_id(): headers = {} license_id = "no_exists" license = { "client_name": "dummy_client", - "number_of_requests" : 0, + "number_of_requests": 0, "secret_id": "xxxx", - "api_email" : "email" + "api_email": "email", } code = 404 expected = {"detail": "No such license to update."} @@ -163,6 +170,7 @@ def _delete_license(headers, license_id, code, expected): assert response.status_code == code assert response.json() == expected + def test_delete_license_basic(): headers = {} license_id = "L1" @@ -171,6 +179,7 @@ def test_delete_license_basic(): _delete_license(headers, license_id, code, expected) + def test_delete_license_wrong_license(): headers = {} license_id = "no_exists" diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py index ee82897b..8539e89c 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py @@ -13,38 +13,29 @@ app.dependency_overrides[get_license_handler] = get_mock_license_handler app.dependency_overrides[get_queue_handler] = get_mock_queue_handler + def _get_all_queue(headers, query, code, expected): response = client.get("/queues", headers=headers, params=query) assert response.status_code == code assert response.json() == expected + def test_get_all_queues(): headers = {} query = {} code = 200 - expected = [ - { - "client_name": "dummy_client", - "license_id": "L1", - "queue": [] - } - ] + expected = [{"client_name": "dummy_client", "license_id": "L1", "queue": []}] _get_all_queue(headers, query, code, expected) + def test_get_client_queues(): headers = {} client_name = "dummy_client" query = {"client_name": client_name} code = 200 - expected = [ - { - "client_name": client_name, - "license_id": "L1", - "queue": [] - } - ] + expected = [{"client_name": client_name, "license_id": "L1", "queue": []}] _get_all_queue(headers, query, code, expected) @@ -55,18 +46,16 @@ def _get_queue_by_license(headers, license_id, code, expected): assert response.status_code == code assert response.json() == expected + def test_get_queue_by_license_basic(): headers = {} license_id = "L1" code = 200 - expected = { - "client_name": "dummy_client", - "license_id": license_id, - "queue": [] - } + expected = {"client_name": "dummy_client", "license_id": license_id, "queue": []} _get_queue_by_license(headers, license_id, code, expected) + def test_get_queue_by_license_wrong_license(): headers = {} license_id = "no_exists" @@ -82,6 +71,7 @@ def _modify_license_queue(headers, license_id, priority_list, code, expected): assert response.status_code == code assert response.json() == expected + def test_modify_license_queue_basic(): headers = {} license_id = "L1" @@ -91,6 +81,7 @@ def test_modify_license_queue_basic(): _modify_license_queue(headers, license_id, priority_list, code, expected) + def test_modify_license_queue_wrong_license_id(): headers = {} license_id = "no_exists" @@ -100,6 +91,7 @@ def test_modify_license_queue_wrong_license_id(): _modify_license_queue(headers, license_id, priority_list, code, expected) + def _modify_config_priority_in_license(headers, license_id, query, code, expected): response = client.put(f"/queues/priority/{license_id}", params=query) @@ -108,38 +100,34 @@ def _modify_config_priority_in_license(headers, license_id, query, code, expecte assert response.status_code == code assert response.json() == expected + def test_modify_config_priority_in_license_basic(): headers = {} license_id = "L1" - query = { - "config_name": "example.cfg", - "priority": 0 - } + query = {"config_name": "example.cfg", "priority": 0} code = 200 - expected = {'message': f"'{license_id}' license 'example.cfg' priority updated successfully."} + expected = { + "message": f"'{license_id}' license 'example.cfg' priority updated successfully." + } _modify_config_priority_in_license(headers, license_id, query, code, expected) + def test_modify_config_priority_in_license_wrong_license(): headers = {} license_id = "no_exists" - query = { - "config_name": "example.cfg", - "priority": 0 - } + query = {"config_name": "example.cfg", "priority": 0} code = 404 - expected = {'detail': "License's priority not found."} + expected = {"detail": "License's priority not found."} _modify_config_priority_in_license(headers, license_id, query, code, expected) + def test_modify_config_priority_in_license_wrong_config(): headers = {} license_id = "no_exists" - query = { - "config_name": "wrong.cfg", - "priority": 0 - } + query = {"config_name": "wrong.cfg", "priority": 0} code = 404 - expected = {'detail': "License's priority not found."} + expected = {"detail": "License's priority not found."} _modify_config_priority_in_license(headers, license_id, query, code, expected) diff --git a/weather_dl_v2/license_deployment/README.md b/weather_dl_v2/license_deployment/README.md index d33628b6..1d300ea0 100644 --- a/weather_dl_v2/license_deployment/README.md +++ b/weather_dl_v2/license_deployment/README.md @@ -1,19 +1,19 @@ # Deployment Instructions & General Notes -* **How to create environment:** +### How to create environment ``` conda env create --name weather-dl-v2-license-dep --file=environment.yml conda activate weather-dl-v2-license-dep ``` -* **Add path of created downloader image in downloader.yaml**: +### Add path of created downloader image in downloader.yaml ``` Please write down the downloader's docker image path at Line 11 of downloader.yaml. ``` -* **Create docker image for license deployment**: +### Create docker image for license deployment ``` export PROJECT_ID= export REPO= eg:weather-tools diff --git a/weather_dl_v2/license_deployment/clients.py b/weather_dl_v2/license_deployment/clients.py index 46dcb49f..b92a0797 100644 --- a/weather_dl_v2/license_deployment/clients.py +++ b/weather_dl_v2/license_deployment/clients.py @@ -20,8 +20,7 @@ from manifest import Manifest, Stage from util import download_with_aria2, retry_with_exponential_backoff -warnings.simplefilter( - "ignore", category=urllib3.connectionpool.InsecureRequestWarning) +warnings.simplefilter("ignore", category=urllib3.connectionpool.InsecureRequestWarning) class Client(abc.ABC): @@ -38,11 +37,13 @@ class Client(abc.ABC): def __init__(self, dataset: str, level: int = logging.INFO) -> None: """Clients are initialized with the general CLI configuration.""" self.dataset = dataset - self.logger = logging.getLogger(f'{__name__}.{type(self).__name__}') + self.logger = logging.getLogger(f"{__name__}.{type(self).__name__}") self.logger.setLevel(level) @abc.abstractmethod - def retrieve(self, dataset: str, selection: t.Dict, output: str, manifest: Manifest) -> None: + def retrieve( + self, dataset: str, selection: t.Dict, output: str, manifest: Manifest + ) -> None: """Download from data source.""" pass @@ -61,6 +62,7 @@ def license_url(self): class SplitCDSRequest(cds_api.Client): """Extended CDS class that separates fetch and download stage.""" + @retry_with_exponential_backoff def _download(self, url, path: str, size: int) -> None: self.info("Downloading %s to %s (%s)", url, path, cds_api.bytes_to_string(size)) @@ -74,7 +76,7 @@ def _download(self, url, path: str, size: int) -> None: def fetch(self, request: t.Dict, dataset: str) -> t.Dict: result = self.retrieve(dataset, request) - return {'href': result.location, 'size': result.content_length} + return {"href": result.location, "size": result.content_length} def download(self, result: cds_api.Result, target: t.Optional[str] = None) -> None: if target: @@ -107,12 +109,12 @@ class CdsClient(Client): """ """Name patterns of datasets that are hosted internally on CDS servers.""" - cds_hosted_datasets = {'reanalysis-era'} + cds_hosted_datasets = {"reanalysis-era"} def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: c = CDSClientExtended( - url=os.environ.get('CLIENT_URL'), - key=os.environ.get('CLIENT_KEY'), + url=os.environ.get("CLIENT_URL"), + key=os.environ.get("CLIENT_KEY"), debug_callback=self.logger.debug, info_callback=self.logger.info, warning_callback=self.logger.warning, @@ -124,7 +126,7 @@ def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: precise_fetch_start_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) manifest.prev_stage_precise_start_time = precise_fetch_start_time result = c.fetch(selection_, dataset) @@ -132,7 +134,7 @@ def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: @property def license_url(self): - return 'https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf' + return "https://cds.climate.copernicus.eu/api/v2/terms/static/licence-to-use-copernicus-products.pdf" @classmethod def num_requests_per_key(cls, dataset: str) -> int: @@ -183,11 +185,10 @@ def __exit__(self, exc_type, exc_value, traceback): class SplitMARSRequest(api.APIRequest): """Extended MARS APIRequest class that separates fetch and download stage.""" + @retry_with_exponential_backoff def _download(self, url, path: str, size: int) -> None: - self.log( - "Transferring %s into %s" % (self._bytename(size), path) - ) + self.log("Transferring %s into %s" % (self._bytename(size), path)) self.log("From %s" % (url,)) download_with_aria2(url, path) @@ -239,12 +240,14 @@ def download(self, res: t.Dict, target: str) -> None: class CDSClientExtended(SplitRequestMixin): """Extended CDS Client class that separates fetch and download stage.""" + def __init__(self, *args, **kwargs): self.c = SplitCDSRequest(*args, **kwargs) class MARSECMWFServiceExtended(api.ECMWFService, SplitRequestMixin): """Extended MARS ECMFService class that separates fetch and download stage.""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.c = SplitMARSRequest( @@ -259,7 +262,8 @@ def __init__(self, *args, **kwargs): class PublicECMWFServerExtended(api.ECMWFDataServer, SplitRequestMixin): - def __init__(self, *args, dataset='', **kwargs): + + def __init__(self, *args, dataset="", **kwargs): super().__init__(*args, **kwargs) self.c = SplitMARSRequest( self.url, @@ -290,6 +294,7 @@ class MarsClient(Client): config: A config that contains pipeline parameters, such as API keys. level: Default log level for the client. """ + def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: c = MARSECMWFServiceExtended( "mars", @@ -305,7 +310,7 @@ def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: precise_fetch_start_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) manifest.prev_stage_precise_start_time = precise_fetch_start_time result = c.fetch(req=selection_) @@ -313,7 +318,7 @@ def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: @property def license_url(self): - return 'https://apps.ecmwf.int/datasets/licences/general/' + return "https://apps.ecmwf.int/datasets/licences/general/" @classmethod def num_requests_per_key(cls, dataset: str) -> int: @@ -333,6 +338,7 @@ def num_requests_per_key(cls, dataset: str) -> int: class ECMWFPublicClient(Client): """A client for ECMWF's public datasets, like TIGGE.""" + def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: c = PublicECMWFServerExtended( url=os.environ.get("CLIENT_URL"), @@ -348,7 +354,7 @@ def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: precise_fetch_start_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) manifest.prev_stage_precise_start_time = precise_fetch_start_time result = c.fetch(req=selection_) @@ -362,8 +368,8 @@ def num_requests_per_key(cls, dataset: str) -> int: @property def license_url(self): if not self.dataset: - raise ValueError('must specify a dataset for this client!') - return f'https://apps.ecmwf.int/datasets/data/{self.dataset.lower()}/licence/' + raise ValueError("must specify a dataset for this client!") + return f"https://apps.ecmwf.int/datasets/data/{self.dataset.lower()}/licence/" class FakeClient(Client): @@ -374,14 +380,14 @@ def retrieve(self, dataset: str, selection: t.Dict, manifest: Manifest) -> None: precise_retrieve_start_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) manifest.prev_stage_precise_start_time = precise_retrieve_start_time - self.logger.debug(f'Downloading {dataset}.') + self.logger.debug(f"Downloading {dataset}.") @property def license_url(self): - return 'lorem ipsum' + return "lorem ipsum" @classmethod def num_requests_per_key(cls, dataset: str) -> int: diff --git a/weather_dl_v2/license_deployment/config.py b/weather_dl_v2/license_deployment/config.py index 1143ce61..2677a60c 100644 --- a/weather_dl_v2/license_deployment/config.py +++ b/weather_dl_v2/license_deployment/config.py @@ -3,7 +3,7 @@ import dataclasses import typing as t -Values = t.Union[t.List['Values'], t.Dict[str, 'Values'], bool, int, float, str] # pytype: disable=not-supported-yet +Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet @dataclasses.dataclass @@ -49,7 +49,7 @@ class Config: selection: t.Dict[str, Values] = dataclasses.field(default_factory=dict) @classmethod - def from_dict(cls, config: t.Dict) -> 'Config': + def from_dict(cls, config: t.Dict) -> "Config": config_instance = cls() for section_key, section_value in config.items(): if section_key == "parameters": @@ -70,32 +70,36 @@ def optimize_selection_partition(selection: t.Dict) -> t.Dict: """ selection_ = copy.deepcopy(selection) - if 'day' in selection_.keys() and selection_['day'] == 'all': - year, month = selection_['year'], selection_['month'] + if "day" in selection_.keys() and selection_["day"] == "all": + year, month = selection_["year"], selection_["month"] - multiples_error = "Cannot use keyword 'all' on selections with multiple '{type}'s." + multiples_error = ( + "Cannot use keyword 'all' on selections with multiple '{type}'s." + ) if isinstance(year, list): - assert len(year) == 1, multiples_error.format(type='year') + assert len(year) == 1, multiples_error.format(type="year") year = year[0] if isinstance(month, list): - assert len(month) == 1, multiples_error.format(type='month') + assert len(month) == 1, multiples_error.format(type="month") month = month[0] if isinstance(year, str): - assert '/' not in year, multiples_error.format(type='year') + assert "/" not in year, multiples_error.format(type="year") if isinstance(month, str): - assert '/' not in month, multiples_error.format(type='month') + assert "/" not in month, multiples_error.format(type="month") year, month = int(year), int(month) _, n_days_in_month = calendar.monthrange(year, month) - selection_['date'] = f'{year:04d}-{month:02d}-01/to/{year:04d}-{month:02d}-{n_days_in_month:02d}' - del selection_['day'] - del selection_['month'] - del selection_['year'] + selection_[ + "date" + ] = f"{year:04d}-{month:02d}-01/to/{year:04d}-{month:02d}-{n_days_in_month:02d}" + del selection_["day"] + del selection_["month"] + del selection_["year"] return selection_ diff --git a/weather_dl_v2/license_deployment/database.py b/weather_dl_v2/license_deployment/database.py index b2d1c525..1449e283 100644 --- a/weather_dl_v2/license_deployment/database.py +++ b/weather_dl_v2/license_deployment/database.py @@ -11,30 +11,37 @@ logger = logging.getLogger(__name__) + class Database(abc.ABC): + @abc.abstractmethod def _get_db(self): pass class CRUDOperations(abc.ABC): + @abc.abstractmethod def _initialize_license_deployment(self, license_id: str) -> dict: pass - + @abc.abstractmethod def _get_config_from_queue_by_license_id(self, license_id: str) -> dict: pass @abc.abstractmethod - def _remove_config_from_license_queue(self, license_id: str, config_name: str) -> None: + def _remove_config_from_license_queue( + self, license_id: str, config_name: str + ) -> None: pass @abc.abstractmethod def _get_partition_from_manifest(self, config_name: str) -> str: pass + class FirestoreClient(Database, CRUDOperations): + def _get_db(self) -> firestore.firestore.Client: """Acquire a firestore client, initializing the firebase app if necessary. Will attempt to get the db client five times. If it's still unsuccessful, a @@ -53,10 +60,12 @@ def _get_db(self) -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - logger.info('Initialized Firebase App.') + logger.info("Initialized Firebase App.") if attempts > 4: - raise RuntimeError('Exceeded number of retries to get firestore client.') from e + raise RuntimeError( + "Exceeded number of retries to get firestore client." + ) from e time.sleep(get_wait_interval(attempts)) @@ -65,13 +74,17 @@ def _get_db(self) -> firestore.firestore.Client: return db def _initialize_license_deployment(self, license_id: str) -> dict: - result: DocumentSnapshot = self._get_db().collection('license').document(license_id).get() + result: DocumentSnapshot = ( + self._get_db().collection("license").document(license_id).get() + ) return result.to_dict() def _get_config_from_queue_by_license_id(self, license_id: str) -> str | None: - result: DocumentSnapshot = self._get_db().collection('queues').document(license_id).get(['queue']) + result: DocumentSnapshot = ( + self._get_db().collection("queues").document(license_id).get(["queue"]) + ) if result.exists: - queue = result.to_dict()['queue'] + queue = result.to_dict()["queue"] if len(queue) > 0: return queue[0] return None @@ -80,29 +93,42 @@ def _get_partition_from_manifest(self, config_name: str) -> str | None: transaction = self._get_db().transaction() return get_partition_from_manifest(transaction, config_name) - def _remove_config_from_license_queue(self, license_id: str, config_name: str) -> None: - result: WriteResult = self._get_db().collection('queues').document(license_id).update({ - 'queue': firestore.ArrayRemove([config_name])}) - logger.info(f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}.") + def _remove_config_from_license_queue( + self, license_id: str, config_name: str + ) -> None: + result: WriteResult = ( + self._get_db() + .collection("queues") + .document(license_id) + .update({"queue": firestore.ArrayRemove([config_name])}) + ) + logger.info( + f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}." + ) @firestore.transactional def get_partition_from_manifest(transaction, config_name: str) -> str | None: db_client = FirestoreClient() filter_1 = FieldFilter("config_name", "==", config_name) - filter_2 = FieldFilter("status", "==", 'scheduled') + filter_2 = FieldFilter("status", "==", "scheduled") and_filter = And(filters=[filter_1, filter_2]) - snapshot = db_client._get_db().collection('test_manifest').where(filter=and_filter)\ - .limit(1).get(transaction=transaction) + snapshot = ( + db_client._get_db() + .collection("test_manifest") + .where(filter=and_filter) + .limit(1) + .get(transaction=transaction) + ) if len(snapshot) > 0: snapshot = snapshot[0] else: return None - ref: DocumentReference = db_client._get_db().collection(u'test_manifest').document(snapshot.id) - transaction.update(ref, { - u'status': u'processing' - }) + ref: DocumentReference = ( + db_client._get_db().collection("test_manifest").document(snapshot.id) + ) + transaction.update(ref, {"status": "processing"}) return snapshot.to_dict() diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index b51e9657..ccdef96a 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -14,39 +14,50 @@ db_client = FirestoreClient() secretmanager_client = secretmanager.SecretManagerServiceClient() - + logger = logging.getLogger(__name__) + def create_job(request, result): res = { - 'config_name': request['config_name'], - 'dataset': request['dataset'], - 'selection': json.loads(request['selection']), - 'user_id': request['username'], - 'url': result['href'], - 'target_path': request['location'] - } + "config_name": request["config_name"], + "dataset": request["dataset"], + "selection": json.loads(request["selection"]), + "user_id": request["username"], + "url": result["href"], + "target_path": request["location"], + } data_str = json.dumps(res) logger.info(f"Creating download job for res: {data_str}") create_download_job(data_str) + @exceptionit def make_fetch_request(request): - client = CLIENTS[client_name](request['dataset']) + client = CLIENTS[client_name](request["dataset"]) manifest = FirestoreManifest() - logger.info(f'By using {client_name} datasets, ' - f'users agree to the terms and conditions specified in {client.license_url!r}') - - target = request['location'] - selection = json.loads(request['selection']) - - logger.info(f'Fetching data for {target!r}.') - with manifest.transact(request['config_name'], request['dataset'], selection, target, request['username']): - result = client.retrieve(request['dataset'], selection, manifest) + logger.info( + f"By using {client_name} datasets, " + f"users agree to the terms and conditions specified in {client.license_url!r}" + ) + + target = request["location"] + selection = json.loads(request["selection"]) + + logger.info(f"Fetching data for {target!r}.") + with manifest.transact( + request["config_name"], + request["dataset"], + selection, + target, + request["username"], + ): + result = client.retrieve(request["dataset"], selection, manifest) create_job(request, result) + def fetch_request_from_db(): request = None config_name = db_client._get_config_from_queue_by_license_id(license_id) @@ -72,7 +83,7 @@ def main(): # Check if the maximum concurrency level has been reached # If so, wait for a slot to become available - while executor._work_queue.qsize()>=concurrency_limit: + while executor._work_queue.qsize() >= concurrency_limit: time.sleep(1) @@ -81,17 +92,19 @@ def boot_up(license: str) -> None: result = db_client._initialize_license_deployment(license) license_id = license - client_name = result['client_name'] - concurrency_limit = result['number_of_requests'] + client_name = result["client_name"] + concurrency_limit = result["number_of_requests"] - response = secretmanager_client.access_secret_version(request={"name": result['secret_id']}) + response = secretmanager_client.access_secret_version( + request={"name": result["secret_id"]} + ) payload = response.payload.data.decode("UTF-8") secret_dict = json.loads(payload) - os.environ.setdefault('CLIENT_URL', secret_dict.get('api_url', "")) - os.environ.setdefault('CLIENT_KEY', secret_dict.get('api_key', "")) - os.environ.setdefault('CLIENT_EMAIL', secret_dict.get('api_email', "")) - + os.environ.setdefault("CLIENT_URL", secret_dict.get("api_url", "")) + os.environ.setdefault("CLIENT_KEY", secret_dict.get("api_key", "")) + os.environ.setdefault("CLIENT_EMAIL", secret_dict.get("api_email", "")) + if __name__ == "__main__": license = sys.argv[2] diff --git a/weather_dl_v2/license_deployment/job_creator.py b/weather_dl_v2/license_deployment/job_creator.py index b5f936ca..75781001 100644 --- a/weather_dl_v2/license_deployment/job_creator.py +++ b/weather_dl_v2/license_deployment/job_creator.py @@ -4,7 +4,7 @@ import uuid from kubernetes import client, config - + def create_download_job(message): """Creates a kubernetes workflow of type Job for downloading the data.""" parsed_message = json.loads(message) @@ -15,9 +15,18 @@ def create_download_job(message): with open(path.join(path.dirname(__file__), "downloader.yaml")) as f: dep = yaml.safe_load(f) uid = uuid.uuid4() - dep['metadata']['name'] = f'downloader-job-id-{uid}' + dep["metadata"]["name"] = f"downloader-job-id-{uid}" # d = target_path.rsplit('/')[-1] # dep['metadata']['name'] = f'a{d}a' - dep['spec']['template']['spec']['containers'][0]['command'] = ['python', 'downloader.py', config_name, dataset, selection, user_id, url, target_path] + dep["spec"]["template"]["spec"]["containers"][0]["command"] = [ + "python", + "downloader.py", + config_name, + dataset, + selection, + user_id, + url, + target_path, + ] batch_api = client.BatchV1Api() - batch_api.create_namespaced_job(body=dep, namespace='default') \ No newline at end of file + batch_api.create_namespaced_job(body=dep, namespace="default") diff --git a/weather_dl_v2/license_deployment/manifest.py b/weather_dl_v2/license_deployment/manifest.py index 93e965a4..873b2213 100644 --- a/weather_dl_v2/license_deployment/manifest.py +++ b/weather_dl_v2/license_deployment/manifest.py @@ -17,7 +17,7 @@ get_file_size, get_wait_interval, generate_md5_hash, - GLOBAL_COVERAGE_AREA + GLOBAL_COVERAGE_AREA, ) import firebase_admin @@ -31,11 +31,12 @@ logger = logging.getLogger(__name__) """An implementation-dependent Manifest URI.""" -Location = t.NewType('Location', str) +Location = t.NewType("Location", str) class ManifestException(Exception): """Errors that occur in Manifest Clients.""" + pass @@ -51,10 +52,11 @@ class Stage(enum.Enum): retrieve : In case of clients where there is no proper separation of fetch & download stages (eg. CDS client), request will be in the retrieve stage i.e. fetch + download. """ - RETRIEVE = 'retrieve' - FETCH = 'fetch' - DOWNLOAD = 'download' - UPLOAD = 'upload' + + RETRIEVE = "retrieve" + FETCH = "fetch" + DOWNLOAD = "download" + UPLOAD = "upload" class Status(enum.Enum): @@ -68,15 +70,16 @@ class Status(enum.Enum): success : This represents the request state execution completed successfully without any error. failure : This represents the request state execution failed. """ - PROCESSING = 'processing' - SCHEDULED = 'scheduled' - IN_PROGRESS = 'in-progress' - SUCCESS = 'success' - FAILURE = 'failure' + + PROCESSING = "processing" + SCHEDULED = "scheduled" + IN_PROGRESS = "in-progress" + SUCCESS = "success" + FAILURE = "failure" @dataclasses.dataclass -class DownloadStatus(): +class DownloadStatus: """Data recorded in `Manifest`s reflecting the status of a download.""" """The name of the config file associated with the request.""" @@ -137,13 +140,13 @@ class DownloadStatus(): upload_end_time: t.Optional[str] = "" @classmethod - def from_dict(cls, download_status: t.Dict) -> 'DownloadStatus': + def from_dict(cls, download_status: t.Dict) -> "DownloadStatus": """Instantiate DownloadStatus dataclass from dict.""" download_status_instance = cls() for key, value in download_status.items(): - if key == 'status': + if key == "status": setattr(download_status_instance, key, Status(value)) - elif key == 'stage' and value is not None: + elif key == "stage" and value is not None: setattr(download_status_instance, key, Stage(value)) else: setattr(download_status_instance, key, value) @@ -161,7 +164,7 @@ def to_dict(cls, instance) -> t.Dict: download_status_dict[key] = value.value elif isinstance(value, pd.Timestamp): download_status_dict[key] = value.isoformat() - elif key == 'selection' and value is not None: + elif key == "selection" and value is not None: download_status_dict[key] = json.dumps(value) else: download_status_dict[key] = value @@ -212,76 +215,107 @@ def __post_init__(self): """Initialize the manifest.""" pass - def schedule(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def schedule( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Indicate that a job has been scheduled for download. 'scheduled' jobs occur before 'in-progress', 'success' or 'finished'. """ - scheduled_time = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat(timespec='seconds') + scheduled_time = ( + datetime.datetime.utcnow() + .replace(tzinfo=datetime.timezone.utc) + .isoformat(timespec="seconds") + ) self.status = DownloadStatus( - config_name=config_name, - dataset=dataset if dataset else None, - selection=selection, - location=location, - area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), - username=user, - stage=None, - status=Status.SCHEDULED, - error=None, - size=None, - scheduled_time=scheduled_time, - retrieve_start_time=None, - retrieve_end_time=None, - fetch_start_time=None, - fetch_end_time=None, - download_start_time=None, - download_end_time=None, - upload_start_time=None, - upload_end_time=None, - ) + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get("area", GLOBAL_COVERAGE_AREA)), + username=user, + stage=None, + status=Status.SCHEDULED, + error=None, + size=None, + scheduled_time=scheduled_time, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=None, + upload_end_time=None, + ) self._update(self.status) - def skip(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def skip( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Updates the manifest to mark the shards that were skipped in the current job as 'upload' stage and 'success' status, indicating that they have already been downloaded. """ old_status = self._read(location) # The manifest needs to be updated for a skipped shard if its entry is not present, or # if the stage is not 'upload', or if the stage is 'upload' but the status is not 'success'. - if old_status.location != location or old_status.stage != Stage.UPLOAD or old_status.status != Status.SUCCESS: + if ( + old_status.location != location + or old_status.stage != Stage.UPLOAD + or old_status.status != Status.SUCCESS + ): current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) size = get_file_size(location) status = DownloadStatus( - config_name=config_name, - dataset=dataset if dataset else None, - selection=selection, - location=location, - area=fetch_geo_polygon(selection.get('area', GLOBAL_COVERAGE_AREA)), - username=user, - stage=Stage.UPLOAD, - status=Status.SUCCESS, - error=None, - size=size, - scheduled_time=None, - retrieve_start_time=None, - retrieve_end_time=None, - fetch_start_time=None, - fetch_end_time=None, - download_start_time=None, - download_end_time=None, - upload_start_time=current_utc_time, - upload_end_time=current_utc_time, - ) + config_name=config_name, + dataset=dataset if dataset else None, + selection=selection, + location=location, + area=fetch_geo_polygon(selection.get("area", GLOBAL_COVERAGE_AREA)), + username=user, + stage=Stage.UPLOAD, + status=Status.SUCCESS, + error=None, + size=size, + scheduled_time=None, + retrieve_start_time=None, + retrieve_end_time=None, + fetch_start_time=None, + fetch_end_time=None, + download_start_time=None, + download_end_time=None, + upload_start_time=current_utc_time, + upload_end_time=current_utc_time, + ) self._update(status) - logger.info(f'Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}.') + logger.info( + f"Manifest updated for skipped shard: {location!r} -- {DownloadStatus.to_dict(status)!r}." + ) - def _set_for_transaction(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> None: + def _set_for_transaction( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> None: """Reset Manifest state in preparation for a new transaction.""" self.status = dataclasses.replace(self._read(location)) self.status.config_name = config_name @@ -301,7 +335,7 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: else: status = Status.FAILURE # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception - error = '\n'.join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + error = "\n".join(traceback.format_exception(exc_type, exc_inst, exc_tb)) new_status = dataclasses.replace(self.status) new_status.error = error @@ -309,7 +343,7 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) # This is necessary for setting the precise start time of the previous stage @@ -333,7 +367,14 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: self._update(self.status) - def transact(self, config_name: str, dataset: str, selection: t.Dict, location: str, user: str) -> 'Manifest': + def transact( + self, + config_name: str, + dataset: str, + selection: t.Dict, + location: str, + user: str, + ) -> "Manifest": """Create a download transaction.""" self._set_for_transaction(config_name, dataset, selection, location, user) return self @@ -347,7 +388,7 @@ def set_stage(self, stage: Stage) -> None: current_utc_time = ( datetime.datetime.utcnow() .replace(tzinfo=datetime.timezone.utc) - .isoformat(timespec='seconds') + .isoformat(timespec="seconds") ) if stage == Stage.FETCH: @@ -407,10 +448,12 @@ def _get_db(self) -> firestore.firestore.Client: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred) - logger.info('Initialized Firebase App.') + logger.info("Initialized Firebase App.") if attempts > 4: - raise ManifestException('Exceeded number of retries to get firestore client.') from e + raise ManifestException( + "Exceeded number of retries to get firestore client." + ) from e time.sleep(get_wait_interval(attempts)) @@ -424,9 +467,7 @@ def _read(self, location: str) -> DownloadStatus: doc_id = generate_md5_hash(location) # Update document with download status - download_doc_ref = ( - self.root_document_for_store(doc_id) - ) + download_doc_ref = self.root_document_for_store(doc_id) result = download_doc_ref.get() row = {} @@ -437,24 +478,24 @@ def _read(self, location: str) -> DownloadStatus: def _update(self, download_status: DownloadStatus) -> None: """Update or create a download status record.""" - logger.info('Updating Firestore Manifest.') + logger.info("Updating Firestore Manifest.") status = DownloadStatus.to_dict(download_status) - doc_id = generate_md5_hash(status['location']) + doc_id = generate_md5_hash(status["location"]) # Update document with download status - download_doc_ref = ( - self.root_document_for_store(doc_id) - ) + download_doc_ref = self.root_document_for_store(doc_id) result: WriteResult = download_doc_ref.set(status) - logger.info(f'Firestore manifest updated. ' - f'update_time={result.update_time}, ' - f'filename={download_status.location}.') + logger.info( + f"Firestore manifest updated. " + f"update_time={result.update_time}, " + f"filename={download_status.location}." + ) def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" # TODO: Get user-defined collection for manifest. - root_collection = 'test_manifest' + root_collection = "test_manifest" return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/license_deployment/util.py b/weather_dl_v2/license_deployment/util.py index 2afb29d3..7ea99ffe 100644 --- a/weather_dl_v2/license_deployment/util.py +++ b/weather_dl_v2/license_deployment/util.py @@ -24,15 +24,20 @@ LONGITUDE_RANGE = (-180, 180) GLOBAL_COVERAGE_AREA = [90, -180, -90, 180] + def exceptionit(func): def inner_function(*args, **kwargs): try: func(*args, **kwargs) except Exception as e: logger.error(f"exception in {func.__name__} {e.__class__.__name__} {e}") + return inner_function -def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exception) -> bool: + +def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter( + exception, +) -> bool: if isinstance(exception, socket.timeout): return True if isinstance(exception, TimeoutError): @@ -44,6 +49,7 @@ def _retry_if_valid_input_but_server_or_socket_error_and_timeout_filter(exceptio class _FakeClock: + def sleep(self, value): pass @@ -53,7 +59,7 @@ def retry_with_exponential_backoff(fun): clock = retry.Clock() # Use a fake clock only during test time... - if 'unittest' in sys.modules.keys(): + if "unittest" in sys.modules.keys(): clock = _FakeClock() return retry.with_exponential_backoff( @@ -80,9 +86,11 @@ def ichunked(iterable: t.Iterable, n: int) -> t.Iterator[t.Iterable]: def copy(src: str, dst: str) -> None: """Copy data via `gsutil cp`.""" try: - subprocess.run(['gsutil', 'cp', src, dst], check=True, capture_output=True) + subprocess.run(["gsutil", "cp", src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: - logger.info(f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}') + logger.info( + f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}' + ) raise @@ -90,7 +98,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - logger.info('Serializing to JSON') + logger.info("Serializing to JSON") if pd.isna(value) or value is None: return None @@ -99,7 +107,11 @@ def to_json_serializable_type(value: t.Any) -> t.Any: elif type(value) == np.ndarray: # Will return a scaler if array is of size 1, else will return a list. return value.tolist() - elif type(value) == datetime.datetime or type(value) == str or type(value) == np.datetime64: + elif ( + type(value) == datetime.datetime + or type(value) == str + or type(value) == np.datetime64 + ): # Assume strings are ISO format timestamps... try: value = datetime.datetime.fromisoformat(value) @@ -122,7 +134,7 @@ def to_json_serializable_type(value: t.Any) -> t.Any: return value.replace(tzinfo=datetime.timezone.utc).isoformat() elif type(value) == np.timedelta64: # Return time delta in seconds. - return float(value / np.timedelta64(1, 's')) + return float(value / np.timedelta64(1, "s")) # This check must happen after processing np.timedelta64 and np.datetime64. elif np.issubdtype(type(value), np.integer): return int(value) @@ -135,13 +147,13 @@ def fetch_geo_polygon(area: t.Union[list, str]) -> str: # Ref: https://confluence.ecmwf.int/pages/viewpage.action?pageId=151520973 if isinstance(area, str): # European area - if area == 'E': + if area == "E": area = [73.5, -27, 33, 45] # Global area - elif area == 'G': + elif area == "G": area = GLOBAL_COVERAGE_AREA else: - raise RuntimeError(f'Not a valid value for area in config: {area}.') + raise RuntimeError(f"Not a valid value for area in config: {area}.") n, w, s, e = [float(x) for x in area] if s < LATITUDE_RANGE[0]: @@ -163,22 +175,24 @@ def fetch_geo_polygon(area: t.Union[list, str]) -> str: def get_file_size(path: str) -> float: parsed_gcs_path = urlparse(path) - if parsed_gcs_path.scheme != 'gs' or parsed_gcs_path.netloc == '': - return os.stat(path).st_size / (1024 ** 3) if os.path.exists(path) else 0 + if parsed_gcs_path.scheme != "gs" or parsed_gcs_path.netloc == "": + return os.stat(path).st_size / (1024**3) if os.path.exists(path) else 0 else: - return gcsio.GcsIO().size(path) / (1024 ** 3) if gcsio.GcsIO().exists(path) else 0 + return ( + gcsio.GcsIO().size(path) / (1024**3) if gcsio.GcsIO().exists(path) else 0 + ) def get_wait_interval(num_retries: int = 0) -> float: """Returns next wait interval in seconds, using an exponential backoff algorithm.""" if 0 == num_retries: return 0 - return 2 ** num_retries + return 2**num_retries def generate_md5_hash(input: str) -> str: """Generates md5 hash for the input string.""" - return hashlib.md5(input.encode('utf-8')).hexdigest() + return hashlib.md5(input.encode("utf-8")).hexdigest() def download_with_aria2(url: str, path: str) -> None: @@ -187,9 +201,24 @@ def download_with_aria2(url: str, path: str) -> None: dir_path, file_name = os.path.split(path) try: subprocess.run( - ['aria2c', '-x', '16', '-s', '16', url, '-d', dir_path, '-o', file_name, '--allow-overwrite'], + [ + "aria2c", + "-x", + "16", + "-s", + "16", + url, + "-d", + dir_path, + "-o", + file_name, + "--allow-overwrite", + ], check=True, - capture_output=True) + capture_output=True, + ) except subprocess.CalledProcessError as e: - logger.info(f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}') + logger.info( + f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}' + ) raise From f68f21aece746aa29ddb19e9f27c0f4c5bb723e0 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:44:37 +0530 Subject: [PATCH 24/51] passing license handler to update license handler (#369) --- weather_dl_v2/fastapi-server/main.py | 2 +- weather_dl_v2/fastapi-server/routers/license.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index d1bd29d3..ffd7a05c 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -23,7 +23,7 @@ def create_pending_license_deployments(): for license_id in license_list: try: logger.info(f"Creating license deployment for {license_id}") - create_deployment(license_id) + create_deployment(license_id, license_handler) except Exception as e: logger.error(f"License deployment failed for {license_id}. Exception: {e}") diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index b764786a..ae7bbd2f 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -26,15 +26,15 @@ class LicenseInternal(License): def get_create_deployment(): - def create_deployment(license_id: str): + def create_deployment(license_id: str, license_handler: LicenseHandler): k8s_deployment_id = create_license_deployment(license_id) - update_license_internal(license_id, k8s_deployment_id) + update_license_internal(license_id, k8s_deployment_id, license_handler) return create_deployment def get_create_deployment_mock(): - def create_deployment_mock(license_id: str): + def create_deployment_mock(license_id: str, license_handler: LicenseHandler): print("create deployment mocked") return create_deployment_mock @@ -91,7 +91,7 @@ async def update_license( license_handler._update_license(license_id, license_dict) terminate_license_deployment(license_id) - create_deployment(license_id) + create_deployment(license_id, license_handler) return {"license_id": license_id, "name": "License updated successfully."} @@ -99,7 +99,7 @@ async def update_license( def update_license_internal( license_id: str, k8s_deployment_id: str, - license_handler: LicenseHandler = Depends(get_license_handler), + license_handler: LicenseHandler, ): if not license_handler._check_license_exists(license_id): raise HTTPException(status_code=404, detail="No such license to update.") @@ -122,7 +122,7 @@ async def add_license( license_dict["k8s_deployment_id"] = "" license_id = license_handler._add_license(license_dict) queue_handler._create_license_queue(license_id, license_dict["client_name"]) - background_tasks.add_task(create_deployment, license_id) + background_tasks.add_task(create_deployment, license_id, license_handler) return {"license_id": license_id, "message": "License added successfully."} From 430fba16f07c89ddbc7fd5d2b55f9ad829f89a71 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Thu, 27 Jul 2023 15:32:32 +0530 Subject: [PATCH 25/51] `dl-v2` server config (#370) * added server config * added global state for config and db session * seperated server config json * minor fix * minor nit: updated var name --- .../database/download_handler.py | 9 +++-- .../database/license_handler.py | 3 +- .../fastapi-server/database/queue_handler.py | 5 ++- .../fastapi-server/database/session.py | 3 +- .../fastapi-server/server_config.json | 5 +++ weather_dl_v2/fastapi-server/server_config.py | 37 +++++++++++++++++++ 6 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 weather_dl_v2/fastapi-server/server_config.json create mode 100644 weather_dl_v2/fastapi-server/server_config.py diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index 65d7eeb3..c2956bf1 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -4,6 +4,7 @@ from google.cloud.firestore_v1 import DocumentSnapshot from google.cloud.firestore_v1.types import WriteResult from database.session import get_db +from server_config import get_config logger = logging.getLogger(__name__) @@ -59,11 +60,11 @@ class DownloadHandlerFirestore(DownloadHandler): def __init__(self, db: firestore.firestore.Client): self.db = db - self.collection = "download" + self.collection = get_config().download_collection def _start_download(self, config_name: str, client_name: str) -> None: result: WriteResult = ( - self.db.collection("download") + self.db.collection(self.collection) .document(config_name) .set({"config_name": config_name, "client_name": client_name}) ) @@ -73,13 +74,13 @@ def _start_download(self, config_name: str, client_name: str) -> None: ) def _stop_download(self, config_name: str) -> None: - timestamp = self.db.collection("download").document(config_name).delete() + timestamp = self.db.collection(self.collection).document(config_name).delete() logger.info( f"Removed {config_name} in 'download' collection. Update_time: {timestamp}." ) def _check_download_exists(self, config_name: str) -> bool: result: DocumentSnapshot = ( - self.db.collection("download").document(config_name).get() + self.db.collection(self.collection).document(config_name).get() ) return result.exists diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index ad352b01..995a4813 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -4,6 +4,7 @@ from google.cloud.firestore_v1 import DocumentSnapshot from google.cloud.firestore_v1.types import WriteResult from database.session import get_db +from server_config import get_config logger = logging.getLogger(__name__) @@ -114,7 +115,7 @@ class LicenseHandlerFirestore(LicenseHandler): def __init__(self, db: firestore.firestore.Client): self.db = db - self.collection = "license" + self.collection = get_config().license_collection # TODO: find alternative way to create license_id def _add_license(self, license_dict: dict) -> str: diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index 8bda3fa9..bd7fb35d 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -4,6 +4,7 @@ from google.cloud.firestore_v1 import DocumentSnapshot from google.cloud.firestore_v1.types import WriteResult from database.session import get_db +from server_config import get_config logger = logging.getLogger(__name__) @@ -124,9 +125,9 @@ def _remove_license_queue(self, license_id: str) -> None: class QueueHandlerFirestore(QueueHandler): - def __init__(self, db: firestore.firestore.Client, collection: str = "queues"): + def __init__(self, db: firestore.firestore.Client): self.db = db - self.collection = collection + self.collection = get_config().queues_collection def _create_license_queue(self, license_id: str, client_name: str) -> None: result: WriteResult = ( diff --git a/weather_dl_v2/fastapi-server/database/session.py b/weather_dl_v2/fastapi-server/database/session.py index e40f8157..83f7c996 100644 --- a/weather_dl_v2/fastapi-server/database/session.py +++ b/weather_dl_v2/fastapi-server/database/session.py @@ -15,13 +15,14 @@ class Database(abc.ABC): def _get_db(self): pass +db = None def get_db() -> firestore.firestore.Client: """Acquire a firestore client, initializing the firebase app if necessary. Will attempt to get the db client five times. If it's still unsuccessful, a `ManifestException` will be raised. """ - db = None + global db attempts = 0 while db is None: diff --git a/weather_dl_v2/fastapi-server/server_config.json b/weather_dl_v2/fastapi-server/server_config.json new file mode 100644 index 00000000..8160c70c --- /dev/null +++ b/weather_dl_v2/fastapi-server/server_config.json @@ -0,0 +1,5 @@ +{ + "download_collection": "download", + "queues_collection": "queues", + "license_collection": "license" +} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/server_config.py b/weather_dl_v2/fastapi-server/server_config.py new file mode 100644 index 00000000..3a10bfac --- /dev/null +++ b/weather_dl_v2/fastapi-server/server_config.py @@ -0,0 +1,37 @@ +import dataclasses +import typing as t +import json + +Values = t.Union[t.List['Values'], t.Dict[str, 'Values'], bool, int, float, str] # pytype: disable=not-supported-yet + +@dataclasses.dataclass +class ServerConfig: + download_collection: str = "" + queues_collection: str = "" + license_collection: str = "" + kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) + + @classmethod + def from_dict(cls, config: t.Dict): + config_instance = cls() + + for key, value in config.items(): + if hasattr(config_instance, key): + setattr(config_instance, key, value) + else: + config_instance.kwargs[key] = value + + return config_instance + +server_config = None + +def get_config(): + global server_config + server_config_json = "server_config.json" + + if(server_config is None): + with open(server_config_json) as file: + firestore_dict = json.load(file) + server_config = ServerConfig.from_dict(firestore_dict) + + return server_config From 531a8a65025caa28cdea27f1a52866b714f52844 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Fri, 28 Jul 2023 16:24:25 +0530 Subject: [PATCH 26/51] Added config status to dl-v2-server (#371) * parallel queury for aggregating config stats and manifest handler * get download routes * mock manifest_handler and updated download tests * updated manifest collection in config * lint fixes * updated config stat response * removed extra db call for get download by config * updated manifest collection name * lint fixes --- .../database/download_handler.py | 43 +++++ .../database/manifest_handler.py | 137 ++++++++++++++++ .../fastapi-server/database/session.py | 2 + .../fastapi-server/routers/download.py | 152 ++++++++++++------ .../fastapi-server/server_config.json | 3 +- weather_dl_v2/fastapi-server/server_config.py | 8 +- .../tests/integration/test_download.py | 49 ++---- 7 files changed, 308 insertions(+), 86 deletions(-) create mode 100644 weather_dl_v2/fastapi-server/database/manifest_handler.py diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index c2956bf1..349e739e 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -31,6 +31,14 @@ def _stop_download(self, config_name: str) -> None: def _check_download_exists(self, config_name: str) -> bool: pass + @abc.abstractmethod + def _get_downloads(self, client_name: str) -> list: + pass + + @abc.abstractmethod + def _get_download_by_config_name(self, config_name: str): + pass + class DownloadHandlerMock(DownloadHandler): @@ -55,6 +63,12 @@ def _check_download_exists(self, config_name: str) -> bool: else: return True + def _get_downloads(self, client_name: str) -> list: + return [{"config_name": "example.cfg", "client_name": "client"}] + + def _get_download_by_config_name(self, config_name: str): + return {"config_name": "example.cfg", "client_name": "client"} + class DownloadHandlerFirestore(DownloadHandler): @@ -84,3 +98,32 @@ def _check_download_exists(self, config_name: str) -> bool: self.db.collection(self.collection).document(config_name).get() ) return result.exists + + def _get_downloads(self, client_name: str) -> list: + snapshot_list = None + if client_name: + snapshot_list = ( + self.db.collection(self.collection) + .where("client_name", "==", client_name) + .get() + ) + else: + snapshot_list = self.db.collection(self.collection).get() + result = [] + for snapshot in snapshot_list: + result.append( + self.db.collection(self.collection) + .document(snapshot.id) + .get() + .to_dict() + ) + return result + + def _get_download_by_config_name(self, config_name: str): + result: DocumentSnapshot = ( + self.db.collection(self.collection).document(config_name).get() + ) + if result.exists: + return result.to_dict() + else: + return None diff --git a/weather_dl_v2/fastapi-server/database/manifest_handler.py b/weather_dl_v2/fastapi-server/database/manifest_handler.py new file mode 100644 index 00000000..6c6e3323 --- /dev/null +++ b/weather_dl_v2/fastapi-server/database/manifest_handler.py @@ -0,0 +1,137 @@ +import abc +import logging +from firebase_admin import firestore +from google.cloud.firestore_v1.base_query import FieldFilter, Or, And +from server_config import get_config +from database.session import get_db + +logger = logging.getLogger(__name__) + + +def get_manifest_handler(): + return ManifestHandlerFirestore(db=get_db()) + + +def get_mock_manifest_handler(): + return ManifestHandlerMock() + + +class ManifestHandler(abc.ABC): + + @abc.abstractmethod + def _get_download_success_count(self, config_name: str) -> int: + pass + + @abc.abstractmethod + def _get_download_failure_count(self, config_name: str) -> int: + pass + + @abc.abstractmethod + def _get_download_scheduled_count(self, config_name: str) -> int: + pass + + @abc.abstractmethod + def _get_download_inprogress_count(self, config_name: str) -> int: + pass + + @abc.abstractmethod + def _get_download_total_count(self, config_name: str) -> int: + pass + + +class ManifestHandlerMock(ManifestHandler): + + def _get_download_failure_count(self, config_name: str) -> int: + return 0 + + def _get_download_inprogress_count(self, config_name: str) -> int: + return 0 + + def _get_download_scheduled_count(self, config_name: str) -> int: + return 0 + + def _get_download_success_count(self, config_name: str) -> int: + return 0 + + def _get_download_total_count(self, config_name: str) -> int: + return 0 + + +class ManifestHandlerFirestore(ManifestHandler): + + def __init__(self, db: firestore.firestore.Client): + self.db = db + self.collection = get_config().manifest_collection + + def _get_download_success_count(self, config_name: str) -> int: + result = ( + self.db.collection(self.collection) + .where("config_name", "==", config_name) + .where("stage", "==", "upload") + .where("status", "==", "success") + .count() + .get() + ) + + count = result[0][0].value + + return count + + def _get_download_failure_count(self, config_name: str) -> int: + result = ( + self.db.collection(self.collection) + .where("config_name", "==", config_name) + .where("status", "==", "failure") + .count() + .get() + ) + + count = result[0][0].value + + return count + + def _get_download_scheduled_count(self, config_name: str) -> int: + result = ( + self.db.collection(self.collection) + .where("config_name", "==", config_name) + .where("status", "==", "scheduled") + .count() + .get() + ) + + count = result[0][0].value + + return count + + def _get_download_inprogress_count(self, config_name: str) -> int: + and_filter = And( + filters=[ + FieldFilter("status", "==", "success"), + FieldFilter("stage", "!=", "upload"), + ] + ) + or_filter = Or(filters=[FieldFilter("status", "==", "in-progress"), and_filter]) + + result = ( + self.db.collection(self.collection) + .where("config_name", "==", config_name) + .where(filter=or_filter) + .count() + .get() + ) + + count = result[0][0].value + + return count + + def _get_download_total_count(self, config_name: str) -> int: + result = ( + self.db.collection(self.collection) + .where("config_name", "==", config_name) + .count() + .get() + ) + + count = result[0][0].value + + return count diff --git a/weather_dl_v2/fastapi-server/database/session.py b/weather_dl_v2/fastapi-server/database/session.py index 83f7c996..37e2f250 100644 --- a/weather_dl_v2/fastapi-server/database/session.py +++ b/weather_dl_v2/fastapi-server/database/session.py @@ -15,8 +15,10 @@ class Database(abc.ABC): def _get_db(self): pass + db = None + def get_db() -> firestore.firestore.Client: """Acquire a firestore client, initializing the firebase app if necessary. Will attempt to get the db client five times. If it's still unsuccessful, a diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 59099c22..181e8c0f 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -2,6 +2,8 @@ from config_processing.pipeline import start_processing_config from database.download_handler import DownloadHandler, get_download_handler from database.queue_handler import QueueHandler, get_queue_handler +from database.manifest_handler import ManifestHandler, get_manifest_handler +import concurrent.futures import shutil import os @@ -12,6 +14,68 @@ ) +def fetch_config_stats( + config_name: str, client_name: str, manifest_handler: ManifestHandler +): + """Get all the config stats parallely.""" + + with concurrent.futures.ThreadPoolExecutor() as executor: + success_count_future = executor.submit( + manifest_handler._get_download_success_count, config_name + ) + scheduled_count_future = executor.submit( + manifest_handler._get_download_scheduled_count, config_name + ) + failure_count_future = executor.submit( + manifest_handler._get_download_failure_count, config_name + ) + inprogress_count_future = executor.submit( + manifest_handler._get_download_inprogress_count, config_name + ) + total_count_future = executor.submit( + manifest_handler._get_download_total_count, config_name + ) + + concurrent.futures.wait([ + success_count_future, + scheduled_count_future, + failure_count_future, + inprogress_count_future, + total_count_future, + ]) + + return { + "config_name": config_name, + "client_name": client_name, + "downloaded_shards": success_count_future.result(), + "scheduled_shards": scheduled_count_future.result(), + "failed_shards": failure_count_future.result(), + "in-progress_shards": inprogress_count_future.result(), + "total_shards": total_count_future.result(), + } + + +def get_fetch_config_stats(): + return fetch_config_stats + + +def get_fetch_config_stats_mock(): + def fetch_config_stats( + config_name: str, client_name: str, manifest_handler: ManifestHandler + ): + return { + "config_name": config_name, + "client_name": client_name, + "downloaded_shards": 0, + "scheduled_shards": 0, + "failed_shards": 0, + "in-progress_shards": 0, + "total_shards": 0, + } + + return fetch_config_stats + + def get_upload(): def upload(file: UploadFile): dest = f"./config_files/{file.filename}" @@ -63,67 +127,53 @@ def submit_download( # Can check the current status of the submitted config. # List status for all the downloads + handle filters @router.get("/") -async def get_downloads(client_name: str | None = None): - # Get this kind of response by querying download collection + manifest collection. - if client_name: - result = { - "config_name": "config_3", - "client_name": client_name, - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - } - else: - result = [ - { - "config_name": "config_1", - "client_name": "MARS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - }, - { - "config_name": "config_2", - "client_name": "MARS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - }, - { - "config_name": "config_3", - "client_name": "CDS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - }, +async def get_downloads( + client_name: str | None = None, + download_handler: DownloadHandler = Depends(get_download_handler), + manifest_handler: ManifestHandler = Depends(get_manifest_handler), + fetch_config_stats=Depends(get_fetch_config_stats), +): + downloads = download_handler._get_downloads(client_name) + config_stats = None + + with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: + futures = [] + + for download in downloads: + future = executor.submit( + fetch_config_stats, + download["config_name"], + download["client_name"], + manifest_handler, + ) + futures.append(future) + + concurrent.futures.wait(futures) + config_stats = [ + future.result() for future in concurrent.futures.as_completed(futures) ] - return result + + return config_stats # Get status of particular download @router.get("/{config_name}") -async def get_download( - config_name: str, download_handler: DownloadHandler = Depends(get_download_handler) +async def get_download_by_config_name( + config_name: str, + download_handler: DownloadHandler = Depends(get_download_handler), + manifest_handler: ManifestHandler = Depends(get_manifest_handler), + fetch_config_stats=Depends(get_fetch_config_stats), ): - if not download_handler._check_download_exists(config_name): + config = download_handler._get_download_by_config_name(config_name) + + if config is None: raise HTTPException( status_code=404, detail="Download config not found in weather-dl v2." ) - # Get this kind of response by querying fake_manifest_db. - result = { - "config_name": config_name, - "client_name": "MARS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - } - return result + return fetch_config_stats( + config["config_name"], config["client_name"], manifest_handler + ) # Stop & remove the execution of the config. diff --git a/weather_dl_v2/fastapi-server/server_config.json b/weather_dl_v2/fastapi-server/server_config.json index 8160c70c..aba11b10 100644 --- a/weather_dl_v2/fastapi-server/server_config.json +++ b/weather_dl_v2/fastapi-server/server_config.json @@ -1,5 +1,6 @@ { "download_collection": "download", "queues_collection": "queues", - "license_collection": "license" + "license_collection": "license", + "manifest_collection": "test_manifest" } \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/server_config.py b/weather_dl_v2/fastapi-server/server_config.py index 3a10bfac..792f81f6 100644 --- a/weather_dl_v2/fastapi-server/server_config.py +++ b/weather_dl_v2/fastapi-server/server_config.py @@ -2,13 +2,15 @@ import typing as t import json -Values = t.Union[t.List['Values'], t.Dict[str, 'Values'], bool, int, float, str] # pytype: disable=not-supported-yet +Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet + @dataclasses.dataclass class ServerConfig: download_collection: str = "" queues_collection: str = "" license_collection: str = "" + manifest_collection: str = "" kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) @classmethod @@ -23,13 +25,15 @@ def from_dict(cls, config: t.Dict): return config_instance + server_config = None + def get_config(): global server_config server_config_json = "server_config.json" - if(server_config is None): + if server_config is None: with open(server_config_json) as file: firestore_dict = json.load(file) server_config = ServerConfig.from_dict(firestore_dict) diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_download.py b/weather_dl_v2/fastapi-server/tests/integration/test_download.py index e4bc8b94..e28188db 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_download.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_download.py @@ -5,7 +5,7 @@ from database.download_handler import get_download_handler, get_mock_download_handler from database.license_handler import get_license_handler, get_mock_license_handler from database.queue_handler import get_queue_handler, get_mock_queue_handler -from routers.download import get_upload, get_upload_mock +from routers.download import get_upload, get_upload_mock, get_fetch_config_stats, get_fetch_config_stats_mock client = TestClient(app) @@ -15,6 +15,7 @@ app.dependency_overrides[get_license_handler] = get_mock_license_handler app.dependency_overrides[get_queue_handler] = get_mock_queue_handler app.dependency_overrides[get_upload] = get_upload_mock +app.dependency_overrides[get_fetch_config_stats] = get_fetch_config_stats_mock def _get_download(headers, query, code, expected): @@ -28,32 +29,15 @@ def test_get_downloads_basic(): headers = {} query = {} code = 200 - expected = [ - { - "config_name": "config_1", - "client_name": "MARS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - }, - { - "config_name": "config_2", - "client_name": "MARS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - }, - { - "config_name": "config_3", - "client_name": "CDS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, - "failed_shards": 0, - }, - ] + expected = [{ + "config_name": "example.cfg", + "client_name": "client", + "downloaded_shards": 0, + "scheduled_shards": 0, + "failed_shards": 0, + "in-progress_shards": 0, + "total_shards": 0, + }] _get_download(headers, query, code, expected) @@ -125,15 +109,16 @@ def _get_download_by_config(headers, config_name, code, expected): def test_get_download_by_config_basic(): headers = {} - config_name = "dummy_config" + config_name = "example.cfg" code = 200 expected = { "config_name": config_name, - "client_name": "MARS", - "total_shards": 10000, - "scheduled_shards": 4990, - "downloaded_shards": 5000, + "client_name": "client", + "downloaded_shards": 0, + "scheduled_shards": 0, "failed_shards": 0, + "in-progress_shards": 0, + "total_shards": 0, } _get_download_by_config(headers, config_name, code, expected) From a532873dde7b99738eb9b0d30e191a20aa36f73b Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:56:23 +0530 Subject: [PATCH 27/51] `dl-v2` server firestore async client (#375) * added firestore async client * minor fixes * updated database handlers * Readme updates. * .gitkeep -> config_files folder. * fixed download add call. * queue handler fix * Fix _update_queues_on_stop_download db call. * Fix file upload on fastapi server. * Improved logging. * Fix download add functionality. * cli condition fix * adding additional configs from queue priority * Added a check in priority_queue list route. * lint fixes * print -> logger.info in queue_handler.py. * removed print statements --------- Co-authored-by: Rahul Mahrsee --- weather_dl_v2/cli/README.md | 3 +- weather_dl_v2/cli/app/main.py | 2 +- .../cli/app/services/download_service.py | 2 +- weather_dl_v2/cli/app/subcommands/queue.py | 6 +- weather_dl_v2/downloader_kubernetes/README.md | 10 +- .../downloader_kubernetes/manifest.py | 2 +- weather_dl_v2/fastapi-server/README.md | 44 +++++- .../config_processing/manifest.py | 2 +- .../config_processing/pipeline.py | 19 ++- .../database/download_handler.py | 67 ++++----- .../database/license_handler.py | 112 +++++++------- .../database/manifest_handler.py | 62 ++++---- .../fastapi-server/database/queue_handler.py | 140 ++++++------------ .../fastapi-server/database/session.py | 12 +- weather_dl_v2/fastapi-server/environment.yml | 2 +- .../license_dep/deployment_creator.py | 7 +- weather_dl_v2/fastapi-server/main.py | 10 +- .../fastapi-server/routers/download.py | 133 +++++++++-------- .../fastapi-server/routers/license.py | 78 ++++++---- .../fastapi-server/routers/queues.py | 69 ++++++--- .../tests/integration/test_download.py | 4 +- .../tests/integration/test_license.py | 2 +- .../tests/integration/test_queues.py | 2 +- weather_dl_v2/license_deployment/README.md | 4 + weather_dl_v2/license_deployment/manifest.py | 2 +- 25 files changed, 422 insertions(+), 374 deletions(-) diff --git a/weather_dl_v2/cli/README.md b/weather_dl_v2/cli/README.md index 12eecb9b..12ce31cc 100644 --- a/weather_dl_v2/cli/README.md +++ b/weather_dl_v2/cli/README.md @@ -21,7 +21,6 @@ export PROJECT_ID= export REPO= eg:weather-tools gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-cli" --timeout=79200 --machine-type=e2-highcpu-32 - ``` ## Create a VM using above created docker-image @@ -34,7 +33,7 @@ gcloud compute instances create-with-container weather-dl-v2-cli \ --project=$PROJECT_ID \ --zone=$ZONE \ --machine-type=e2-medium \ - --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default \ + --network-interface=network-tier=PREMIUM,subnet=default \ --maintenance-policy=MIGRATE \ --provisioning-model=STANDARD \ --service-account=$SERVICE_ACCOUNT \ diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py index 85066d72..59c84bf9 100644 --- a/weather_dl_v2/cli/app/main.py +++ b/weather_dl_v2/cli/app/main.py @@ -24,7 +24,7 @@ def ping(): except requests.exceptions.RequestException as e: raise SystemExit(e) - logger.info(x.text) + print(x.text) if __name__ == "__main__": diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index a021ee18..241e13a3 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -49,7 +49,7 @@ def _list_all_downloads_by_client_name(self, client_name: str): def _get_download_by_config(self, config_name: str): return network_service.get( - uri=f"{self.endpoint}/download{config_name}", + uri=f"{self.endpoint}/{config_name}", header={"accept": "application/json"}, ) diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py index b20a4b58..61bf8fdd 100644 --- a/weather_dl_v2/cli/app/subcommands/queue.py +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -71,11 +71,11 @@ def modify_license_queue( print("Priority file or config name with absolute priority must be passed.") return - if file and (config or priority): + if file is not None and (config is not None or priority is not None): print("--config & --priority can't be used along with --file argument.") return - if file: + if file is not None: validator = QueueValidator(valid_keys=["priority"]) try: @@ -86,7 +86,7 @@ def modify_license_queue( return print(queue_service._edit_license_queue(license, priority_list)) return - elif config and priority: + elif config is not None and priority is not None: if priority < 0: print("Priority can not be negative.") return diff --git a/weather_dl_v2/downloader_kubernetes/README.md b/weather_dl_v2/downloader_kubernetes/README.md index 49643b82..bb0e3567 100644 --- a/weather_dl_v2/downloader_kubernetes/README.md +++ b/weather_dl_v2/downloader_kubernetes/README.md @@ -9,15 +9,21 @@ We are not configuring any service account here hence make sure that compute eng * roles/bigquery.dataEditor * roles/bigquery.jobUser -### Write the manifest location path +### Add the manifest location path ``` Please write down the manifest path at Line 43 of downloader.py. Eg: "fs://test_manifest?projectId=XXX" ``` +### Add manifest collection name in manifest.py +``` +Please write down the manifest collection name at Line 482 of manifest.py. +``` + ### Create docker image for downloader: ``` +export PROJECT_ID= export REPO= eg:weather-tools -gcloud builds submit Dockerfile --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-downloader" --timeout=79200 --machine-type=e2-highcpu-32 +gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-downloader" --timeout=79200 --machine-type=e2-highcpu-32 ``` diff --git a/weather_dl_v2/downloader_kubernetes/manifest.py b/weather_dl_v2/downloader_kubernetes/manifest.py index 185e3473..6d0ae0dd 100644 --- a/weather_dl_v2/downloader_kubernetes/manifest.py +++ b/weather_dl_v2/downloader_kubernetes/manifest.py @@ -479,5 +479,5 @@ def _update(self, download_status: DownloadStatus) -> None: def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" # TODO: Get user-defined collection for manifest. - root_collection = "test_manifest" + root_collection = "XXXXXXXXXX" return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 8d6acb33..20392c36 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -1,5 +1,38 @@ # Deployment Instructions & General Notes +### User authorization required to set up the environment: +* roles/container.admin + +### Authorization needed for the tool to operate: +We are not configuring any service account here hence make sure that compute engine default service account have roles: +* roles/pubsub.subscriber +* roles/storage.admin +* roles/bigquery.dataEditor +* roles/bigquery.jobUser + +### Install kubectl: +``` +apt-get update + +apt-get install -y kubectl +``` + +### Create cluster: +``` +export PROJECT_ID= +export REGION= eg: us-west1 +export ZONE= eg: us-west1-a +export CLUSTER_NAME= eg: weather-dl-v2-cluster +export DOWNLOAD_NODE_POOL=downloader-pool + +gcloud beta container --project $PROJECT_ID clusters create $CLUSTER_NAME --zone $ZONE --no-enable-basic-auth --cluster-version "1.27.2-gke.1200" --release-channel "regular" --machine-type "e2-standard-8" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "1100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/cloud-platform" --max-pods-per-node "16" --num-nodes "4" --logging=SYSTEM,WORKLOAD --monitoring=SYSTEM --enable-ip-alias --network "projects/$PROJECT_ID/global/networks/default" --subnetwork "projects/$PROJECT_ID/regions/$REGION/subnetworks/default" --no-enable-intra-node-visibility --default-max-pods-per-node "16" --enable-autoscaling --min-nodes "4" --max-nodes "100" --location-policy "BALANCED" --no-enable-master-authorized-networks --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --enable-managed-prometheus --enable-shielded-nodes --node-locations $ZONE --node-labels preemptible=false && gcloud beta container --project $PROJECT_ID node-pools create $DOWNLOAD_NODE_POOL --cluster $CLUSTER_NAME --zone $ZONE --machine-type "e2-standard-8" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "1100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/cloud-platform" --max-pods-per-node "16" --num-nodes "1" --enable-autoscaling --min-nodes "1" --max-nodes "100" --location-policy "BALANCED" --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --node-locations $ZONE --node-labels preemptible=false +``` + +### Connect to Cluster: +``` +gcloud container clusters get-credentials $CLUSTER_NAME --zone $ZONE --project $PROJECT_ID +``` + ### How to create environment: ``` conda env create --name weather-dl-v2-server --file=environment.yml @@ -7,6 +40,16 @@ conda env create --name weather-dl-v2-server --file=environment.yml conda activate weather-dl-v2-server ``` +### Make changes in server config, if required +``` +Please make approriate changes in server config, if required. +``` + +### Add manifest collection name in config_processing/manifest.py +``` +Please write down the manifest collection name at Line 498 of manifest.py. +``` + ### To run fastapi server: ``` uvicorn main:app --reload @@ -20,7 +63,6 @@ uvicorn main:app --reload Please write down the license deployment's docker image path at Line 22 of license_deployment.yaml. ``` - ### Create docker image for server: ``` export PROJECT_ID= diff --git a/weather_dl_v2/fastapi-server/config_processing/manifest.py b/weather_dl_v2/fastapi-server/config_processing/manifest.py index 444dd933..062bb66f 100644 --- a/weather_dl_v2/fastapi-server/config_processing/manifest.py +++ b/weather_dl_v2/fastapi-server/config_processing/manifest.py @@ -495,5 +495,5 @@ def _update(self, download_status: DownloadStatus) -> None: def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" # TODO: Get user-defined collection for manifest. - root_collection = "test_manifest" + root_collection = "XXXXXXXXXXXX" return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index 836f24f1..1292eb9c 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -5,12 +5,20 @@ from .manifest import FirestoreManifest from database.download_handler import get_download_handler from database.queue_handler import get_queue_handler +from fastapi.concurrency import run_in_threadpool download_handler = get_download_handler() queue_handler = get_queue_handler() -def start_processing_config(config_file, licenses): +def _do_partitions(partition_obj: PartitionConfig): + for partition in partition_obj.prepare_partitions(): + # Skip existing downloads + if partition_obj.new_downloads_only(partition): + partition_obj.update_manifest_collection(partition) + + +async def start_processing_config(config_file, licenses): config = {} manifest = FirestoreManifest() @@ -25,11 +33,8 @@ def start_processing_config(config_file, licenses): partition_obj = PartitionConfig(config, None, manifest) # Prepare partitions - for partition in partition_obj.prepare_partitions(): - # Skip existing downloads - if partition_obj.new_downloads_only(partition): - partition_obj.update_manifest_collection(partition) + await run_in_threadpool(_do_partitions, partition_obj) # Make entry in 'download' & 'queues' collection. - download_handler._start_download(config_name, config.client) - queue_handler._update_queues_on_start_download(config_name, licenses) + await download_handler._start_download(config_name, config.client) + await queue_handler._update_queues_on_start_download(config_name, licenses) diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index 349e739e..e8fa79f8 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -1,16 +1,16 @@ import abc import logging from firebase_admin import firestore -from google.cloud.firestore_v1 import DocumentSnapshot +from google.cloud.firestore_v1 import DocumentSnapshot, FieldFilter from google.cloud.firestore_v1.types import WriteResult -from database.session import get_db +from database.session import get_async_client from server_config import get_config logger = logging.getLogger(__name__) def get_download_handler(): - return DownloadHandlerFirestore(db=get_db()) + return DownloadHandlerFirestore(db=get_async_client()) def get_mock_download_handler(): @@ -20,23 +20,23 @@ def get_mock_download_handler(): class DownloadHandler(abc.ABC): @abc.abstractmethod - def _start_download(self, config_name: str, client_name: str) -> None: + async def _start_download(self, config_name: str, client_name: str) -> None: pass @abc.abstractmethod - def _stop_download(self, config_name: str) -> None: + async def _stop_download(self, config_name: str) -> None: pass @abc.abstractmethod - def _check_download_exists(self, config_name: str) -> bool: + async def _check_download_exists(self, config_name: str) -> bool: pass @abc.abstractmethod - def _get_downloads(self, client_name: str) -> list: + async def _get_downloads(self, client_name: str) -> list: pass @abc.abstractmethod - def _get_download_by_config_name(self, config_name: str): + async def _get_download_by_config_name(self, config_name: str): pass @@ -45,17 +45,17 @@ class DownloadHandlerMock(DownloadHandler): def __init__(self): pass - def _start_download(self, config_name: str, client_name: str) -> None: + async def _start_download(self, config_name: str, client_name: str) -> None: logger.info( f"Added {config_name} in 'download' collection. Update_time: 000000." ) - def _stop_download(self, config_name: str) -> None: + async def _stop_download(self, config_name: str) -> None: logger.info( f"Removed {config_name} in 'download' collection. Update_time: 000000." ) - def _check_download_exists(self, config_name: str) -> bool: + async def _check_download_exists(self, config_name: str) -> bool: if config_name == "no_exist": return False elif config_name == "no_exist.cfg": @@ -63,10 +63,10 @@ def _check_download_exists(self, config_name: str) -> bool: else: return True - def _get_downloads(self, client_name: str) -> list: + async def _get_downloads(self, client_name: str) -> list: return [{"config_name": "example.cfg", "client_name": "client"}] - def _get_download_by_config_name(self, config_name: str): + async def _get_download_by_config_name(self, config_name: str): return {"config_name": "example.cfg", "client_name": "client"} @@ -76,9 +76,9 @@ def __init__(self, db: firestore.firestore.Client): self.db = db self.collection = get_config().download_collection - def _start_download(self, config_name: str, client_name: str) -> None: + async def _start_download(self, config_name: str, client_name: str) -> None: result: WriteResult = ( - self.db.collection(self.collection) + await self.db.collection(self.collection) .document(config_name) .set({"config_name": config_name, "client_name": client_name}) ) @@ -87,41 +87,36 @@ def _start_download(self, config_name: str, client_name: str) -> None: f"Added {config_name} in 'download' collection. Update_time: {result.update_time}." ) - def _stop_download(self, config_name: str) -> None: - timestamp = self.db.collection(self.collection).document(config_name).delete() + async def _stop_download(self, config_name: str) -> None: + timestamp = ( + await self.db.collection(self.collection).document(config_name).delete() + ) logger.info( f"Removed {config_name} in 'download' collection. Update_time: {timestamp}." ) - def _check_download_exists(self, config_name: str) -> bool: + async def _check_download_exists(self, config_name: str) -> bool: result: DocumentSnapshot = ( - self.db.collection(self.collection).document(config_name).get() + await self.db.collection(self.collection).document(config_name).get() ) return result.exists - def _get_downloads(self, client_name: str) -> list: - snapshot_list = None + async def _get_downloads(self, client_name: str) -> list: + docs = [] if client_name: - snapshot_list = ( + docs = ( self.db.collection(self.collection) - .where("client_name", "==", client_name) - .get() + .where(filter=FieldFilter("client_name", "==", client_name)) + .stream() ) else: - snapshot_list = self.db.collection(self.collection).get() - result = [] - for snapshot in snapshot_list: - result.append( - self.db.collection(self.collection) - .document(snapshot.id) - .get() - .to_dict() - ) - return result + docs = self.db.collection(self.collection).stream() + + return [doc.to_dict() async for doc in docs] - def _get_download_by_config_name(self, config_name: str): + async def _get_download_by_config_name(self, config_name: str): result: DocumentSnapshot = ( - self.db.collection(self.collection).document(config_name).get() + await self.db.collection(self.collection).document(config_name).get() ) if result.exists: return result.to_dict() diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index 995a4813..a5c19e05 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -1,16 +1,17 @@ import abc import logging from firebase_admin import firestore -from google.cloud.firestore_v1 import DocumentSnapshot +from google.cloud.firestore_v1 import DocumentSnapshot, FieldFilter from google.cloud.firestore_v1.types import WriteResult -from database.session import get_db +from database.session import get_async_client from server_config import get_config + logger = logging.getLogger(__name__) def get_license_handler(): - return LicenseHandlerFirestore(db=get_db()) + return LicenseHandlerFirestore(db=get_async_client()) def get_mock_license_handler(): @@ -20,35 +21,35 @@ def get_mock_license_handler(): class LicenseHandler(abc.ABC): @abc.abstractmethod - def _add_license(self, license_dict: dict) -> str: + async def _add_license(self, license_dict: dict) -> str: pass @abc.abstractmethod - def _delete_license(self, license_id: str) -> None: + async def _delete_license(self, license_id: str) -> None: pass @abc.abstractmethod - def _check_license_exists(self, license_id: str) -> bool: + async def _check_license_exists(self, license_id: str) -> bool: pass @abc.abstractmethod - def _get_license_by_license_id(self, license_id: str) -> dict: + async def _get_license_by_license_id(self, license_id: str) -> dict: pass @abc.abstractmethod - def _get_license_by_client_name(self, client_name: str) -> list: + async def _get_license_by_client_name(self, client_name: str) -> list: pass @abc.abstractmethod - def _get_licenses(self) -> list: + async def _get_licenses(self) -> list: pass @abc.abstractmethod - def _update_license(self, license_id: str, license_dict: dict) -> None: + async def _update_license(self, license_id: str, license_dict: dict) -> None: pass @abc.abstractmethod - def _get_license_without_deployment(self) -> list: + async def _get_license_without_deployment(self) -> list: pass @@ -57,28 +58,28 @@ class LicenseHandlerMock(LicenseHandler): def __init__(self): pass - def _add_license(self, license_dict: dict) -> str: + async def _add_license(self, license_dict: dict) -> str: license_id = "L1" logger.info(f"Added {license_id} in 'license' collection. Update_time: 00000.") return license_id - def _delete_license(self, license_id: str) -> None: + async def _delete_license(self, license_id: str) -> None: logger.info( f"Removed {license_id} in 'license' collection. Update_time: 00000." ) - def _update_license(self, license_id: str, license_dict: dict) -> None: + async def _update_license(self, license_id: str, license_dict: dict) -> None: logger.info( f"Updated {license_id} in 'license' collection. Update_time: 00000." ) - def _check_license_exists(self, license_id: str) -> bool: + async def _check_license_exists(self, license_id: str) -> bool: if license_id == "no_exists": return False else: return True - def _get_license_by_license_id(self, license_id: str) -> dict: + async def _get_license_by_license_id(self, license_id: str) -> dict: if license_id == "no_exists": return None return { @@ -89,7 +90,7 @@ def _get_license_by_license_id(self, license_id: str) -> dict: "number_of_requets": 100, } - def _get_license_by_client_name(self, client_name: str) -> list: + async def _get_license_by_client_name(self, client_name: str) -> list: return [{ "license_id": "L1", "secret_id": "xxxx", @@ -98,7 +99,7 @@ def _get_license_by_client_name(self, client_name: str) -> list: "number_of_requets": 100, }] - def _get_licenses(self) -> list: + async def _get_licenses(self) -> list: return [{ "license_id": "L1", "secret_id": "xxxx", @@ -107,37 +108,42 @@ def _get_licenses(self) -> list: "number_of_requets": 100, }] - def _get_license_without_deployment(self) -> list: + async def _get_license_without_deployment(self) -> list: return [] class LicenseHandlerFirestore(LicenseHandler): - def __init__(self, db: firestore.firestore.Client): + def __init__(self, db: firestore.firestore.AsyncClient): self.db = db self.collection = get_config().license_collection # TODO: find alternative way to create license_id - def _add_license(self, license_dict: dict) -> str: - license_id = f"L{len(self.db.collection(self.collection).get()) + 1}" + async def _add_license(self, license_dict: dict) -> str: + license_count = await self.db.collection(self.collection).count().get() + license_id = f"L{license_count[0][0].value + 1}" license_dict["license_id"] = license_id result: WriteResult = ( - self.db.collection(self.collection).document(license_id).set(license_dict) + await self.db.collection(self.collection) + .document(license_id) + .set(license_dict) ) logger.info( f"Added {license_id} in 'license' collection. Update_time: {result.update_time}." ) return license_id - def _delete_license(self, license_id: str) -> None: - timestamp = self.db.collection(self.collection).document(license_id).delete() + async def _delete_license(self, license_id: str) -> None: + timestamp = ( + await self.db.collection(self.collection).document(license_id).delete() + ) logger.info( f"Removed {license_id} in 'license' collection. Update_time: {timestamp}." ) - def _update_license(self, license_id: str, license_dict: dict) -> None: + async def _update_license(self, license_id: str, license_dict: dict) -> None: result: WriteResult = ( - self.db.collection(self.collection) + await self.db.collection(self.collection) .document(license_id) .update(license_dict) ) @@ -145,48 +151,34 @@ def _update_license(self, license_id: str, license_dict: dict) -> None: f"Updated {license_id} in 'license' collection. Update_time: {result.update_time}." ) - def _check_license_exists(self, license_id: str) -> bool: + async def _check_license_exists(self, license_id: str) -> bool: result: DocumentSnapshot = ( - self.db.collection(self.collection).document(license_id).get() + await self.db.collection(self.collection).document(license_id).get() ) return result.exists - def _get_license_by_license_id(self, license_id: str) -> dict: + async def _get_license_by_license_id(self, license_id: str) -> dict: result: DocumentSnapshot = ( - self.db.collection(self.collection).document(license_id).get() + await self.db.collection(self.collection).document(license_id).get() ) return result.to_dict() - def _get_license_by_client_name(self, client_name: str) -> list: - snapshot_list = ( + async def _get_license_by_client_name(self, client_name: str) -> list: + docs = ( self.db.collection(self.collection) - .where("client_name", "==", client_name) - .get() + .where(filter=FieldFilter("client_name", "==", client_name)) + .stream() ) - result = [] - for snapshot in snapshot_list: - result.append(snapshot.to_dict()) - return result - - def _get_licenses(self) -> list: - snapshot_list = self.db.collection(self.collection).get() - result = [] - for snapshot in snapshot_list: - result.append( - self.db.collection(self.collection) - .document(snapshot.id) - .get() - .to_dict() - ) - return result - - def _get_license_without_deployment(self) -> list: - snapshot_list = ( + return [doc.to_dict() async for doc in docs] + + async def _get_licenses(self) -> list: + docs = self.db.collection(self.collection).stream() + return [doc.to_dict() async for doc in docs] + + async def _get_license_without_deployment(self) -> list: + docs = ( self.db.collection(self.collection) - .where("k8s_deployment_id", "==", "") - .get() + .where(filter=FieldFilter("k8s_deployment_id", "==", "")) + .stream() ) - result = [] - for snapshot in snapshot_list: - result.append(snapshot.to_dict()["license_id"]) - return result + return [doc.to_dict() async for doc in docs] diff --git a/weather_dl_v2/fastapi-server/database/manifest_handler.py b/weather_dl_v2/fastapi-server/database/manifest_handler.py index 6c6e3323..371c3758 100644 --- a/weather_dl_v2/fastapi-server/database/manifest_handler.py +++ b/weather_dl_v2/fastapi-server/database/manifest_handler.py @@ -3,13 +3,13 @@ from firebase_admin import firestore from google.cloud.firestore_v1.base_query import FieldFilter, Or, And from server_config import get_config -from database.session import get_db +from database.session import get_async_client logger = logging.getLogger(__name__) def get_manifest_handler(): - return ManifestHandlerFirestore(db=get_db()) + return ManifestHandlerFirestore(db=get_async_client()) def get_mock_manifest_handler(): @@ -19,41 +19,41 @@ def get_mock_manifest_handler(): class ManifestHandler(abc.ABC): @abc.abstractmethod - def _get_download_success_count(self, config_name: str) -> int: + async def _get_download_success_count(self, config_name: str) -> int: pass @abc.abstractmethod - def _get_download_failure_count(self, config_name: str) -> int: + async def _get_download_failure_count(self, config_name: str) -> int: pass @abc.abstractmethod - def _get_download_scheduled_count(self, config_name: str) -> int: + async def _get_download_scheduled_count(self, config_name: str) -> int: pass @abc.abstractmethod - def _get_download_inprogress_count(self, config_name: str) -> int: + async def _get_download_inprogress_count(self, config_name: str) -> int: pass @abc.abstractmethod - def _get_download_total_count(self, config_name: str) -> int: + async def _get_download_total_count(self, config_name: str) -> int: pass class ManifestHandlerMock(ManifestHandler): - def _get_download_failure_count(self, config_name: str) -> int: + async def _get_download_failure_count(self, config_name: str) -> int: return 0 - def _get_download_inprogress_count(self, config_name: str) -> int: + async def _get_download_inprogress_count(self, config_name: str) -> int: return 0 - def _get_download_scheduled_count(self, config_name: str) -> int: + async def _get_download_scheduled_count(self, config_name: str) -> int: return 0 - def _get_download_success_count(self, config_name: str) -> int: + async def _get_download_success_count(self, config_name: str) -> int: return 0 - def _get_download_total_count(self, config_name: str) -> int: + async def _get_download_total_count(self, config_name: str) -> int: return 0 @@ -63,12 +63,12 @@ def __init__(self, db: firestore.firestore.Client): self.db = db self.collection = get_config().manifest_collection - def _get_download_success_count(self, config_name: str) -> int: + async def _get_download_success_count(self, config_name: str) -> int: result = ( - self.db.collection(self.collection) - .where("config_name", "==", config_name) - .where("stage", "==", "upload") - .where("status", "==", "success") + await self.db.collection(self.collection) + .where(filter=FieldFilter("config_name", "==", config_name)) + .where(filter=FieldFilter("stage", "==", "upload")) + .where(filter=FieldFilter("status", "==", "success")) .count() .get() ) @@ -77,11 +77,11 @@ def _get_download_success_count(self, config_name: str) -> int: return count - def _get_download_failure_count(self, config_name: str) -> int: + async def _get_download_failure_count(self, config_name: str) -> int: result = ( - self.db.collection(self.collection) - .where("config_name", "==", config_name) - .where("status", "==", "failure") + await self.db.collection(self.collection) + .where(filter=FieldFilter("config_name", "==", config_name)) + .where(filter=FieldFilter("status", "==", "failure")) .count() .get() ) @@ -90,11 +90,11 @@ def _get_download_failure_count(self, config_name: str) -> int: return count - def _get_download_scheduled_count(self, config_name: str) -> int: + async def _get_download_scheduled_count(self, config_name: str) -> int: result = ( - self.db.collection(self.collection) - .where("config_name", "==", config_name) - .where("status", "==", "scheduled") + await self.db.collection(self.collection) + .where(filter=FieldFilter("config_name", "==", config_name)) + .where(filter=FieldFilter("status", "==", "scheduled")) .count() .get() ) @@ -103,7 +103,7 @@ def _get_download_scheduled_count(self, config_name: str) -> int: return count - def _get_download_inprogress_count(self, config_name: str) -> int: + async def _get_download_inprogress_count(self, config_name: str) -> int: and_filter = And( filters=[ FieldFilter("status", "==", "success"), @@ -113,8 +113,8 @@ def _get_download_inprogress_count(self, config_name: str) -> int: or_filter = Or(filters=[FieldFilter("status", "==", "in-progress"), and_filter]) result = ( - self.db.collection(self.collection) - .where("config_name", "==", config_name) + await self.db.collection(self.collection) + .where(filter=FieldFilter("config_name", "==", config_name)) .where(filter=or_filter) .count() .get() @@ -124,10 +124,10 @@ def _get_download_inprogress_count(self, config_name: str) -> int: return count - def _get_download_total_count(self, config_name: str) -> int: + async def _get_download_total_count(self, config_name: str) -> int: result = ( - self.db.collection(self.collection) - .where("config_name", "==", config_name) + await self.db.collection(self.collection) + .where(filter=FieldFilter("config_name", "==", config_name)) .count() .get() ) diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index bd7fb35d..d703d3db 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -1,16 +1,16 @@ import abc import logging from firebase_admin import firestore -from google.cloud.firestore_v1 import DocumentSnapshot +from google.cloud.firestore_v1 import DocumentSnapshot, FieldFilter from google.cloud.firestore_v1.types import WriteResult -from database.session import get_db +from database.session import get_async_client from server_config import get_config logger = logging.getLogger(__name__) def get_queue_handler(): - return QueueHandlerFirestore(db=get_db()) + return QueueHandlerFirestore(db=get_async_client()) def get_mock_queue_handler(): @@ -20,107 +20,93 @@ def get_mock_queue_handler(): class QueueHandler(abc.ABC): @abc.abstractmethod - def _create_license_queue(self, license_id: str, client_name: str) -> None: + async def _create_license_queue(self, license_id: str, client_name: str) -> None: pass @abc.abstractmethod - def _remove_license_queue(self, license_id: str) -> None: + async def _remove_license_queue(self, license_id: str) -> None: pass @abc.abstractmethod - def _get_queues(self) -> list: + async def _get_queues(self) -> list: pass @abc.abstractmethod - def _get_queue_by_license_id(self, license_id: str) -> dict: + async def _get_queue_by_license_id(self, license_id: str) -> dict: pass @abc.abstractmethod - def _get_queue_by_client_name(self, client_name: str) -> list: + async def _get_queue_by_client_name(self, client_name: str) -> list: pass @abc.abstractmethod - def _update_license_queue(self, license_id: str, priority_list: list) -> None: + async def _update_license_queue(self, license_id: str, priority_list: list) -> None: pass @abc.abstractmethod - def _update_queues_on_start_download( + async def _update_queues_on_start_download( self, config_name: str, licenses: list ) -> None: pass @abc.abstractmethod - def _update_queues_on_stop_download(self, config_name: str) -> None: + async def _update_queues_on_stop_download(self, config_name: str) -> None: pass @abc.abstractmethod - def _update_config_priority_in_license( + async def _update_config_priority_in_license( self, license_id: str, config_name: str, priority: int ) -> None: pass - @abc.abstractmethod - def _create_license_queue(self, license_id: str, client_name: str) -> None: - pass - - @abc.abstractmethod - def _remove_license_queue(self, license_id: str) -> None: - pass - class QueueHandlerMock(QueueHandler): def __init__(self): pass - def _create_license_queue(self, license_id: str, client_name: str) -> None: + async def _create_license_queue(self, license_id: str, client_name: str) -> None: logger.info( f"Added {license_id} queue in 'queues' collection. Update_time: 000000." ) - def _remove_license_queue(self, license_id: str) -> None: + async def _remove_license_queue(self, license_id: str) -> None: logger.info( f"Removed {license_id} queue in 'queues' collection. Update_time: 000000." ) - def _get_queues(self) -> list: + async def _get_queues(self) -> list: return [{"client_name": "dummy_client", "license_id": "L1", "queue": []}] - def _get_queue_by_license_id(self, license_id: str) -> dict: + async def _get_queue_by_license_id(self, license_id: str) -> dict: if license_id == "no_exists": return None return {"client_name": "dummy_client", "license_id": license_id, "queue": []} - def _get_queue_by_client_name(self, client_name: str) -> list: + async def _get_queue_by_client_name(self, client_name: str) -> list: return [{"client_name": client_name, "license_id": "L1", "queue": []}] - def _update_license_queue(self, license_id: str, priority_list: list) -> None: + async def _update_license_queue(self, license_id: str, priority_list: list) -> None: logger.info( f"Updated {license_id} queue in 'queues' collection. Update_time: 00000." ) - def _update_queues_on_start_download( + async def _update_queues_on_start_download( self, config_name: str, licenses: list ) -> None: logger.info( f"Updated {license} queue in 'queues' collection. Update_time: 00000." ) - def _update_queues_on_stop_download(self, config_name: str) -> None: + async def _update_queues_on_stop_download(self, config_name: str) -> None: logger.info( "Updated snapshot.id queue in 'queues' collection. Update_time: 00000." ) - def _update_config_priority_in_license( + async def _update_config_priority_in_license( self, license_id: str, config_name: str, priority: int ) -> None: - print("Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") - - def _create_license_queue(self, license_id: str, client_name: str) -> None: - logger.info("Added L1 queue in 'queues' collection. Update_time: 00000.") - - def _remove_license_queue(self, license_id: str) -> None: - logger.info("Removed L1 queue in 'queues' collection. Update_time: 00000.") + logger.info("Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") class QueueHandlerFirestore(QueueHandler): @@ -129,9 +115,9 @@ def __init__(self, db: firestore.firestore.Client): self.db = db self.collection = get_config().queues_collection - def _create_license_queue(self, license_id: str, client_name: str) -> None: + async def _create_license_queue(self, license_id: str, client_name: str) -> None: result: WriteResult = ( - self.db.collection(self.collection) + await self.db.collection(self.collection) .document(license_id) .set({"license_id": license_id, "client_name": client_name, "queue": []}) ) @@ -139,44 +125,35 @@ def _create_license_queue(self, license_id: str, client_name: str) -> None: f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}." ) - def _remove_license_queue(self, license_id: str) -> None: - timestamp = self.db.collection(self.collection).document(license_id).delete() + async def _remove_license_queue(self, license_id: str) -> None: + timestamp = ( + await self.db.collection(self.collection).document(license_id).delete() + ) logger.info( f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}." ) - def _get_queues(self) -> list: - snapshot_list = self.db.collection(self.collection).get() - result = [] - for snapshot in snapshot_list: - result.append( - self.db.collection(self.collection) - .document(snapshot.id) - .get() - .to_dict() - ) - return result + async def _get_queues(self) -> list: + docs = self.db.collection(self.collection).stream() + return [doc.to_dict() async for doc in docs] - def _get_queue_by_license_id(self, license_id: str) -> dict: + async def _get_queue_by_license_id(self, license_id: str) -> dict: result: DocumentSnapshot = ( - self.db.collection(self.collection).document(license_id).get() + await self.db.collection(self.collection).document(license_id).get() ) return result.to_dict() - def _get_queue_by_client_name(self, client_name: str) -> list: - snapshot_list = ( + async def _get_queue_by_client_name(self, client_name: str) -> list: + docs = ( self.db.collection(self.collection) - .where("client_name", "==", client_name) - .get() + .where(filter=FieldFilter("client_name", "==", client_name)) + .stream() ) - result = [] - for snapshot in snapshot_list: - result.append(snapshot.to_dict()) - return result + return [doc.to_dict() async for doc in docs] - def _update_license_queue(self, license_id: str, priority_list: list) -> None: + async def _update_license_queue(self, license_id: str, priority_list: list) -> None: result: WriteResult = ( - self.db.collection(self.collection) + await self.db.collection(self.collection) .document(license_id) .update({"queue": priority_list}) ) @@ -184,12 +161,12 @@ def _update_license_queue(self, license_id: str, priority_list: list) -> None: f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}." ) - def _update_queues_on_start_download( + async def _update_queues_on_start_download( self, config_name: str, licenses: list ) -> None: for license in licenses: result: WriteResult = ( - self.db.collection(self.collection) + await self.db.collection(self.collection) .document(license) .update({"queue": firestore.ArrayUnion([config_name])}) ) @@ -197,11 +174,11 @@ def _update_queues_on_start_download( f"Updated {license} queue in 'queues' collection. Update_time: {result.update_time}." ) - def _update_queues_on_stop_download(self, config_name: str) -> None: - snapshot_list = self.db.collection(self.collection).get() + async def _update_queues_on_stop_download(self, config_name: str) -> None: + snapshot_list = await self.db.collection(self.collection).get() for snapshot in snapshot_list: result: WriteResult = ( - self.db.collection(self.collection) + await self.db.collection(self.collection) .document(snapshot.id) .update({"queue": firestore.ArrayRemove([config_name])}) ) @@ -209,39 +186,20 @@ def _update_queues_on_stop_download(self, config_name: str) -> None: f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}." ) - def _update_config_priority_in_license( + async def _update_config_priority_in_license( self, license_id: str, config_name: str, priority: int ) -> None: snapshot: DocumentSnapshot = ( - self.db.collection("queues").document(license_id).get() + await self.db.collection(self.collection).document(license_id).get() ) priority_list = snapshot.to_dict()["queue"] - if config_name not in priority_list: - print(f"'{config_name}' not in queue.") - raise new_priority_list = [c for c in priority_list if c != config_name] new_priority_list.insert(priority, config_name) result: WriteResult = ( - self.db.collection("queues") + await self.db.collection(self.collection) .document(license_id) .update({"queue": new_priority_list}) ) - print( - f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}." - ) - - def _create_license_queue(self, license_id: str, client_name: str) -> None: - result: WriteResult = ( - self.db.collection("queues") - .document(license_id) - .set({"license_id": license_id, "client_name": client_name, "queue": []}) - ) logger.info( - f"Added {license_id} queue in 'queues' collection. Update_time: {result.update_time}." - ) - - def _remove_license_queue(self, license_id: str) -> None: - timestamp = self.db.collection("queues").document(license_id).delete() - logger.info( - f"Removed {license_id} queue in 'queues' collection. Update_time: {timestamp}." + f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}." ) diff --git a/weather_dl_v2/fastapi-server/database/session.py b/weather_dl_v2/fastapi-server/database/session.py index 37e2f250..91579bb3 100644 --- a/weather_dl_v2/fastapi-server/database/session.py +++ b/weather_dl_v2/fastapi-server/database/session.py @@ -2,7 +2,7 @@ import abc import logging import firebase_admin -from firebase_admin import firestore +from google.cloud import firestore from firebase_admin import credentials from config_processing.util import get_wait_interval @@ -16,20 +16,16 @@ def _get_db(self): pass -db = None +db: firestore.AsyncClient = None -def get_db() -> firestore.firestore.Client: - """Acquire a firestore client, initializing the firebase app if necessary. - Will attempt to get the db client five times. If it's still unsuccessful, a - `ManifestException` will be raised. - """ +def get_async_client() -> firestore.AsyncClient: global db attempts = 0 while db is None: try: - db = firestore.client() + db = firestore.AsyncClient() except ValueError as e: # The above call will fail with a value error when the firebase app is not initialized. # Initialize the app here, and try again. diff --git a/weather_dl_v2/fastapi-server/environment.yml b/weather_dl_v2/fastapi-server/environment.yml index b7aee824..a6ce07fb 100644 --- a/weather_dl_v2/fastapi-server/environment.yml +++ b/weather_dl_v2/fastapi-server/environment.yml @@ -9,7 +9,7 @@ dependencies: - google-cloud-sdk=410.0.0 - pip: - kubernetes - - fastapi[all] + - fastapi[all]==0.97.0 - python-multipart - numpy - apache-beam[gcp] diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py index 9f2dd554..481cdae5 100644 --- a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -1,7 +1,10 @@ +import logging from os import path import yaml from kubernetes import client, config +logger = logging.getLogger(__name__) + def create_license_deployment(license_id: str) -> str: """Creates a kubernetes workflow of type Job for downloading the data.""" @@ -25,7 +28,7 @@ def create_license_deployment(license_id: str) -> str: body=deployment_manifest, namespace="default" ) - print("Deployment created successfully:", response.metadata.name) + logger.info(f"Deployment created successfully: {response.metadata.name}") return deployment_name @@ -42,4 +45,4 @@ def terminate_license_deployment(license_id: str) -> None: # Delete the deployment api_instance.delete_namespaced_deployment(name=deployment_name, namespace="default") - print(f"Deployment '{deployment_name}' deleted successfully.") + logger.info(f"Deployment '{deployment_name}' deleted successfully.") diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index ffd7a05c..c058b87f 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -14,11 +14,11 @@ logger = logging.getLogger(__name__) -def create_pending_license_deployments(): +async def create_pending_license_deployments(): """Creates license deployments for Licenses whose deployments does not exist.""" license_handler = get_license_handler() create_deployment = get_create_deployment() - license_list = license_handler._get_license_without_deployment() + license_list = await license_handler._get_license_without_deployment() for license_id in license_list: try: @@ -32,10 +32,10 @@ def create_pending_license_deployments(): async def lifespan(app: FastAPI): logger.info("Started FastAPI server") # Boot up - # TODO: Replace hard-coded collection name by read a server config. - + # Make directory to store the uploaded config files. + os.makedirs(os.path.join(os.getcwd(), "config_files"), exist_ok=True) # Retrieve license information & create license deployment if needed. - create_pending_license_deployments() + await create_pending_license_deployments() yield # Clean up diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 181e8c0f..b5f5236f 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -1,11 +1,15 @@ +import asyncio +import logging +import os +import shutil + from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, Depends from config_processing.pipeline import start_processing_config from database.download_handler import DownloadHandler, get_download_handler from database.queue_handler import QueueHandler, get_queue_handler from database.manifest_handler import ManifestHandler, get_manifest_handler -import concurrent.futures -import shutil -import os + +logger = logging.getLogger(__name__) router = APIRouter( prefix="/download", @@ -14,45 +18,40 @@ ) -def fetch_config_stats( +async def fetch_config_stats( config_name: str, client_name: str, manifest_handler: ManifestHandler ): """Get all the config stats parallely.""" - with concurrent.futures.ThreadPoolExecutor() as executor: - success_count_future = executor.submit( - manifest_handler._get_download_success_count, config_name - ) - scheduled_count_future = executor.submit( - manifest_handler._get_download_scheduled_count, config_name - ) - failure_count_future = executor.submit( - manifest_handler._get_download_failure_count, config_name - ) - inprogress_count_future = executor.submit( - manifest_handler._get_download_inprogress_count, config_name - ) - total_count_future = executor.submit( - manifest_handler._get_download_total_count, config_name - ) - - concurrent.futures.wait([ - success_count_future, - scheduled_count_future, - failure_count_future, - inprogress_count_future, - total_count_future, - ]) + success_coroutine = manifest_handler._get_download_success_count(config_name) + scheduled_coroutine = manifest_handler._get_download_scheduled_count(config_name) + failure_coroutine = manifest_handler._get_download_failure_count(config_name) + inprogress_coroutine = manifest_handler._get_download_inprogress_count(config_name) + total_coroutine = manifest_handler._get_download_total_count(config_name) + + ( + success_count, + scheduled_count, + failure_count, + inprogress_count, + total_count, + ) = await asyncio.gather( + success_coroutine, + scheduled_coroutine, + failure_coroutine, + inprogress_coroutine, + total_coroutine, + ) - return { - "config_name": config_name, - "client_name": client_name, - "downloaded_shards": success_count_future.result(), - "scheduled_shards": scheduled_count_future.result(), - "failed_shards": failure_count_future.result(), - "in-progress_shards": inprogress_count_future.result(), - "total_shards": total_count_future.result(), - } + return { + "config_name": config_name, + "client_name": client_name, + "downloaded_shards": success_count, + "scheduled_shards": scheduled_count, + "failed_shards": failure_count, + "in-progress_shards": inprogress_count, + "total_shards": total_count, + } def get_fetch_config_stats(): @@ -78,7 +77,7 @@ def fetch_config_stats( def get_upload(): def upload(file: UploadFile): - dest = f"./config_files/{file.filename}" + dest = os.path.join(os.getcwd(), "config_files", file.filename) with open(dest, "wb+") as dest_: shutil.copyfileobj(file.file, dest_) return dest @@ -95,7 +94,7 @@ def upload(file: UploadFile): # Can submit a config to the server. @router.post("/") -def submit_download( +async def submit_download( file: UploadFile | None = None, licenses: list = [], background_tasks: BackgroundTasks = BackgroundTasks(), @@ -103,9 +102,14 @@ def submit_download( upload=Depends(get_upload), ): if not file: + logger.error("No upload file sent.") raise HTTPException(status_code=404, detail="No upload file sent.") else: - if download_handler._check_download_exists(file.filename): + if await download_handler._check_download_exists(file.filename): + logger.error( + f"Please stop the ongoing download of the config file '{file.filename}' " + "before attempting to start a new download." + ) raise HTTPException( status_code=400, detail=f"Please stop the ongoing download of the config file '{file.filename}' " @@ -118,7 +122,8 @@ def submit_download( return { "message": f"file '{file.filename}' saved at '{dest}' successfully." } - except Exception: + except Exception as e: + logger.error(f"Failed to save file '{file.filename} due to {e}.") raise HTTPException( status_code=500, detail=f"Failed to save file '{file.filename}'." ) @@ -133,27 +138,17 @@ async def get_downloads( manifest_handler: ManifestHandler = Depends(get_manifest_handler), fetch_config_stats=Depends(get_fetch_config_stats), ): - downloads = download_handler._get_downloads(client_name) - config_stats = None - - with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: - futures = [] - - for download in downloads: - future = executor.submit( - fetch_config_stats, - download["config_name"], - download["client_name"], - manifest_handler, - ) - futures.append(future) + downloads = await download_handler._get_downloads(client_name) + coroutines = [] - concurrent.futures.wait(futures) - config_stats = [ - future.result() for future in concurrent.futures.as_completed(futures) - ] + for download in downloads: + coroutines.append( + fetch_config_stats( + download["config_name"], download["client_name"], manifest_handler + ) + ) - return config_stats + return await asyncio.gather(*coroutines) # Get status of particular download @@ -164,14 +159,16 @@ async def get_download_by_config_name( manifest_handler: ManifestHandler = Depends(get_manifest_handler), fetch_config_stats=Depends(get_fetch_config_stats), ): - config = download_handler._get_download_by_config_name(config_name) + config = await download_handler._get_download_by_config_name(config_name) if config is None: + logger.error(f"Download config {config_name} not found in weather-dl v2.") raise HTTPException( - status_code=404, detail="Download config not found in weather-dl v2." + status_code=404, + detail=f"Download config {config_name} not found in weather-dl v2.", ) - return fetch_config_stats( + return await fetch_config_stats( config["config_name"], config["client_name"], manifest_handler ) @@ -183,13 +180,15 @@ async def delete_download( download_handler: DownloadHandler = Depends(get_download_handler), queue_handler: QueueHandler = Depends(get_queue_handler), ): - if not download_handler._check_download_exists(config_name): + if not await download_handler._check_download_exists(config_name): + logger.error(f"No such download config {config_name} to stop & remove.") raise HTTPException( - status_code=404, detail="No such download config to stop & remove." + status_code=404, + detail=f"No such download config {config_name} to stop & remove.", ) - download_handler._stop_download(config_name) - queue_handler._update_queues_on_stop_download(config_name) + await download_handler._stop_download(config_name) + await queue_handler._update_queues_on_stop_download(config_name) return { "config_name": config_name, "message": "Download config stopped & removed successfully.", diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index ae7bbd2f..72c5ce08 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -1,9 +1,13 @@ +import logging + from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends from pydantic import BaseModel from license_dep.deployment_creator import create_license_deployment, terminate_license_deployment from database.license_handler import LicenseHandler, get_license_handler from database.queue_handler import QueueHandler, get_queue_handler +logger = logging.getLogger(__name__) + # TODO: Make use of google secret manager. # REF: https://cloud.google.com/secret-manager. @@ -25,17 +29,34 @@ class LicenseInternal(License): ) +# Add/Update k8s deployment ID for existing license (intenally). +async def update_license_internal( + license_id: str, + k8s_deployment_id: str, + license_handler: LicenseHandler, +): + if not await license_handler._check_license_exists(license_id): + logger.info(f"No such license {license_id} to update.") + raise HTTPException( + status_code=404, detail=f"No such license {license_id} to update." + ) + license_dict = {"k8s_deployment_id": k8s_deployment_id} + + await license_handler._update_license(license_id, license_dict) + return {"license_id": license_id, "message": "License updated successfully."} + + def get_create_deployment(): - def create_deployment(license_id: str, license_handler: LicenseHandler): + async def create_deployment(license_id: str, license_handler: LicenseHandler): k8s_deployment_id = create_license_deployment(license_id) - update_license_internal(license_id, k8s_deployment_id, license_handler) + await update_license_internal(license_id, k8s_deployment_id, license_handler) return create_deployment def get_create_deployment_mock(): def create_deployment_mock(license_id: str, license_handler: LicenseHandler): - print("create deployment mocked") + logger.info("create deployment mocked") return create_deployment_mock @@ -46,7 +67,7 @@ def get_terminate_license_deployment(): def get_terminate_license_deployment_mock(): def get_terminate_license_deployment_mock(license_id): - print(f"terminating license deployment for {license_id}") + logger.info(f"terminating license deployment for {license_id}") return get_terminate_license_deployment_mock @@ -58,9 +79,9 @@ async def get_licenses( license_handler: LicenseHandler = Depends(get_license_handler), ): if client_name: - result = license_handler._get_license_by_client_name(client_name) + result = await license_handler._get_license_by_client_name(client_name) else: - result = license_handler._get_licenses() + result = await license_handler._get_licenses() return result @@ -69,9 +90,10 @@ async def get_licenses( async def get_license_by_license_id( license_id: str, license_handler: LicenseHandler = Depends(get_license_handler) ): - result = license_handler._get_license_by_license_id(license_id) + result = await license_handler._get_license_by_license_id(license_id) if not result: - raise HTTPException(status_code=404, detail="License not found.") + logger.info(f"License {license_id} not found.") + raise HTTPException(status_code=404, detail=f"License {license_id} not found.") return result @@ -84,31 +106,20 @@ async def update_license( create_deployment=Depends(get_create_deployment), terminate_license_deployment=Depends(get_terminate_license_deployment), ): - if not license_handler._check_license_exists(license_id): - raise HTTPException(status_code=404, detail="No such license to update.") + if not await license_handler._check_license_exists(license_id): + logger.error(f"No such license {license_id} to update.") + raise HTTPException( + status_code=404, detail=f"No such license {license_id} to update." + ) license_dict = license.dict() - license_handler._update_license(license_id, license_dict) + await license_handler._update_license(license_id, license_dict) terminate_license_deployment(license_id) - create_deployment(license_id, license_handler) + await create_deployment(license_id, license_handler) return {"license_id": license_id, "name": "License updated successfully."} -# Add/Update k8s deployment ID for existing license (intenally). -def update_license_internal( - license_id: str, - k8s_deployment_id: str, - license_handler: LicenseHandler, -): - if not license_handler._check_license_exists(license_id): - raise HTTPException(status_code=404, detail="No such license to update.") - license_dict = {"k8s_deployment_id": k8s_deployment_id} - - license_handler._update_license(license_id, license_dict) - return {"license_id": license_id, "message": "License updated successfully."} - - # Add new license @router.post("/") async def add_license( @@ -120,8 +131,8 @@ async def add_license( ): license_dict = license.dict() license_dict["k8s_deployment_id"] = "" - license_id = license_handler._add_license(license_dict) - queue_handler._create_license_queue(license_id, license_dict["client_name"]) + license_id = await license_handler._add_license(license_dict) + await queue_handler._create_license_queue(license_id, license_dict["client_name"]) background_tasks.add_task(create_deployment, license_id, license_handler) return {"license_id": license_id, "message": "License added successfully."} @@ -135,9 +146,12 @@ async def delete_license( queue_handler: QueueHandler = Depends(get_queue_handler), terminate_license_deployment=Depends(get_terminate_license_deployment), ): - if not license_handler._check_license_exists(license_id): - raise HTTPException(status_code=404, detail="No such license to delete.") - license_handler._delete_license(license_id) - queue_handler._remove_license_queue(license_id) + if not await license_handler._check_license_exists(license_id): + logger.error(f"No such license {license_id} to delete.") + raise HTTPException( + status_code=404, detail=f"No such license {license_id} to delete." + ) + await license_handler._delete_license(license_id) + await queue_handler._remove_license_queue(license_id) background_tasks.add_task(terminate_license_deployment, license_id) return {"license_id": license_id, "message": "License removed successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py index f51d4834..75252e1f 100644 --- a/weather_dl_v2/fastapi-server/routers/queues.py +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -1,6 +1,11 @@ +import logging + from fastapi import APIRouter, HTTPException, Depends from database.queue_handler import QueueHandler, get_queue_handler from database.license_handler import LicenseHandler, get_license_handler +from database.download_handler import DownloadHandler, get_download_handler + +logger = logging.getLogger(__name__) router = APIRouter( prefix="/queues", @@ -17,9 +22,9 @@ async def get_all_license_queue( queue_handler: QueueHandler = Depends(get_queue_handler), ): if client_name: - result = queue_handler._get_queue_by_client_name(client_name) + result = await queue_handler._get_queue_by_client_name(client_name) else: - result = queue_handler._get_queues() + result = await queue_handler._get_queues() return result @@ -28,46 +33,76 @@ async def get_all_license_queue( async def get_license_queue( license_id: str, queue_handler: QueueHandler = Depends(get_queue_handler) ): - result = queue_handler._get_queue_by_license_id(license_id) + result = await queue_handler._get_queue_by_license_id(license_id) if not result: - raise HTTPException(status_code=404, detail="License's priority not found.") + logger.error(f"License priority for {license_id} not found.") + raise HTTPException( + status_code=404, detail=f"License priority for {license_id} not found." + ) return result # Change priority queue of particular license @router.post("/{license_id}") -def modify_license_queue( +async def modify_license_queue( license_id: str, priority_list: list | None = [], queue_handler: QueueHandler = Depends(get_queue_handler), license_handler: LicenseHandler = Depends(get_license_handler), + download_handler: DownloadHandler = Depends(get_download_handler) ): - if not license_handler._check_license_exists(license_id): - raise HTTPException(status_code=404, detail="License's priority not found.") + if not await license_handler._check_license_exists(license_id): + logger.error(f"License {license_id} not found.") + raise HTTPException(status_code=404, detail=f"License {license_id} not found.") + + for config_name in priority_list: + config = await download_handler._get_download_by_config_name(config_name) + if config is None: + logger.error(f"Download config {config_name} not found in weather-dl v2.") + raise HTTPException( + status_code=404, detail=f"Download config {config_name} not found in weather-dl v2." + ) try: - queue_handler._update_license_queue(license_id, priority_list) + await queue_handler._update_license_queue(license_id, priority_list) return {"message": f"'{license_id}' license priority updated successfully."} - except Exception: - return {"message": f"Failed to update '{license_id}' license priority."} + except Exception as e: + logger.error(f"Failed to update '{license_id}' license priority due to {e}.") + raise HTTPException( + status_code=404, detail=f"Failed to update '{license_id}' license priority." + ) # Change config's priority in particular license @router.put("/priority/{license_id}") -def modify_config_priority_in_license( +async def modify_config_priority_in_license( license_id: str, config_name: str, priority: int, queue_handler: QueueHandler = Depends(get_queue_handler), license_handler: LicenseHandler = Depends(get_license_handler), + download_handler: DownloadHandler = Depends(get_download_handler), ): - if not license_handler._check_license_exists(license_id): - raise HTTPException(status_code=404, detail="License's priority not found.") + if not await license_handler._check_license_exists(license_id): + logger.error(f"License {license_id} not found.") + raise HTTPException(status_code=404, detail=f"License {license_id} not found.") + + config = await download_handler._get_download_by_config_name(config_name) + if config is None: + logger.error(f"Download config {config_name} not found in weather-dl v2.") + raise HTTPException( + status_code=404, + detail=f"Download config {config_name} not found in weather-dl v2.", + ) + try: - queue_handler._update_config_priority_in_license( + await queue_handler._update_config_priority_in_license( license_id, config_name, priority ) return { - "message": f"'{license_id}' license '{config_name}' priority updated successfully." + "message": f"'{license_id}' license -- '{config_name}' priority updated successfully." } - except Exception: - return {"message": f"Failed to update '{license_id}' license priority."} + except Exception as e: + logger.error(f"Failed to update '{license_id}' license priority due to {e}.") + raise HTTPException( + status_code=404, detail=f"Failed to update '{license_id}' license priority." + ) diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_download.py b/weather_dl_v2/fastapi-server/tests/integration/test_download.py index e28188db..08958737 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_download.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_download.py @@ -47,13 +47,13 @@ def _submit_download(headers, file_path, licenses, code, expected): try: file = {"file": open(file_path, "rb")} except FileNotFoundError: - print("file not found.") + logger.info("file not found.") payload = {"licenses": licenses} response = client.post("/download", headers=headers, files=file, data=payload) - print(f"resp {response.json()}") + logger.info(f"resp {response.json()}") assert response.status_code == code assert response.json() == expected diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_license.py b/weather_dl_v2/fastapi-server/tests/integration/test_license.py index e770310c..ef7a630a 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_license.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_license.py @@ -94,7 +94,7 @@ def test_add_license_basic(): def _get_license_by_license_id(headers, license_id, code, expected): response = client.get(f"/license/{license_id}", headers=headers) - print(f"response {response.json()}") + logger.info(f"response {response.json()}") assert response.status_code == code assert response.json() == expected diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py index 8539e89c..624c3890 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py @@ -95,7 +95,7 @@ def test_modify_license_queue_wrong_license_id(): def _modify_config_priority_in_license(headers, license_id, query, code, expected): response = client.put(f"/queues/priority/{license_id}", params=query) - print(f"response {response.json()}") + logger.info(f"response {response.json()}") assert response.status_code == code assert response.json() == expected diff --git a/weather_dl_v2/license_deployment/README.md b/weather_dl_v2/license_deployment/README.md index 1d300ea0..9afd2bd1 100644 --- a/weather_dl_v2/license_deployment/README.md +++ b/weather_dl_v2/license_deployment/README.md @@ -12,6 +12,10 @@ conda activate weather-dl-v2-license-dep Please write down the downloader's docker image path at Line 11 of downloader.yaml. ``` +### Add manifest collection name in manifest.py +``` +Please write down the manifest collection name at Line 500 of manifest.py. +``` ### Create docker image for license deployment ``` diff --git a/weather_dl_v2/license_deployment/manifest.py b/weather_dl_v2/license_deployment/manifest.py index 873b2213..fdb80ea8 100644 --- a/weather_dl_v2/license_deployment/manifest.py +++ b/weather_dl_v2/license_deployment/manifest.py @@ -497,5 +497,5 @@ def _update(self, download_status: DownloadStatus) -> None: def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" # TODO: Get user-defined collection for manifest. - root_collection = "test_manifest" + root_collection = "XXXXXXXXXXXXXXX" return self._get_db().collection(root_collection).document(store_scheme) From 18ce1b725b68a1fcbfc9f4038d6814233af2038d Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Tue, 8 Aug 2023 10:27:23 +0000 Subject: [PATCH 28/51] Implemented force-download. --- weather_dl_v2/cli/CLI-Documentation.md | 5 +++-- weather_dl_v2/cli/app/services/download_service.py | 7 +++++-- weather_dl_v2/cli/app/subcommands/download.py | 13 ++++++++----- weather_dl_v2/fastapi-server/API-Interactions.md | 2 +- .../fastapi-server/config_processing/pipeline.py | 4 ++-- weather_dl_v2/fastapi-server/routers/download.py | 3 ++- 6 files changed, 21 insertions(+), 13 deletions(-) diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md index 0fe3093f..e4f86bfe 100644 --- a/weather_dl_v2/cli/CLI-Documentation.md +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -34,11 +34,12 @@ Manage download configs. > `FILE_PATH` : Path to config file. ##### Options -> `-l/--license` : License ID to which this download has to be added to. +> `-l/--license` (Required): License ID to which this download has to be added to. +> `-f/--force-download` : Force redownload of partitions that were previously downloaded. ##### Usage ``` -weather-dl-v2 download add /path/to/example.cfg –l L1 -l L2 +weather-dl-v2 download add /path/to/example.cfg –l L1 -l L2 [--force-download] ``` diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 241e13a3..171f4300 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -22,7 +22,8 @@ def _get_download_by_config(self, config_name: str): pass @abc.abstractmethod - def _add_new_download(self, file_path: str, licenses: t.List[str]): + def _add_new_download(self, file_path: str, licenses: t.List[str], + force_download: bool): pass @abc.abstractmethod @@ -53,7 +54,8 @@ def _get_download_by_config(self, config_name: str): header={"accept": "application/json"}, ) - def _add_new_download(self, file_path: str, licenses: t.List[str]): + def _add_new_download(self, file_path: str, licenses: t.List[str], + force_download: bool): try: file = {"file": open(file_path, "rb")} except FileNotFoundError: @@ -64,6 +66,7 @@ def _add_new_download(self, file_path: str, licenses: t.List[str]): header={"accept": "application/json"}, file=file, payload={"licenses": licenses}, + query={"force_download": force_download}, ) def _remove_download(self, config_name: str): diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 10b5fbb4..92f566c8 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -40,13 +40,16 @@ def submit_download( ], license: Annotated[ List[str], typer.Option("--license", "-l", help="License ID.") - ] = [], + ], + force_download: Annotated[ + bool, typer.Option( + "-f", "--force-download", + help="Force redownload of partitions that were previously downloaded." + ) + ] = False, ): - if len(license) == 0: - print("No licenses mentioned. Please specify licenese Id.") - return - print(download_service._add_new_download(file_path, license)) + print(download_service._add_new_download(file_path, license, force_download)) @app.command("get", help="Get a particular config.") diff --git a/weather_dl_v2/fastapi-server/API-Interactions.md b/weather_dl_v2/fastapi-server/API-Interactions.md index 55013377..f5a8f699 100644 --- a/weather_dl_v2/fastapi-server/API-Interactions.md +++ b/weather_dl_v2/fastapi-server/API-Interactions.md @@ -3,7 +3,7 @@ |---|---|---| | `weather-dl-v2 ping` | `get` | `/` | Download | | | -| `weather-dl-v2 download add –l ` | `post` | `/download/` | +| `weather-dl-v2 download add –l [--force-download]` | `post` | `/download?force_download={value}` | | `weather-dl-v2 download list` | `get` | `/download/` | | `weather-dl-v2 download list --filter client_name=` | `get` | `/download?client_name={name}` | | `weather-dl-v2 download get ` | `get` | `/download/{config_name}` | diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index 1292eb9c..5d930531 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -18,7 +18,7 @@ def _do_partitions(partition_obj: PartitionConfig): partition_obj.update_manifest_collection(partition) -async def start_processing_config(config_file, licenses): +async def start_processing_config(config_file, licenses, force_download): config = {} manifest = FirestoreManifest() @@ -27,7 +27,7 @@ async def start_processing_config(config_file, licenses): config_name = os.path.split(config_file)[1] config = process_config(f, config_name) - config.force_download = True + config.force_download = force_download config.user_id = getpass.getuser() partition_obj = PartitionConfig(config, None, manifest) diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index b5f5236f..3d3b6f22 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -97,6 +97,7 @@ def upload(file: UploadFile): async def submit_download( file: UploadFile | None = None, licenses: list = [], + force_download: bool = False, background_tasks: BackgroundTasks = BackgroundTasks(), download_handler: DownloadHandler = Depends(get_download_handler), upload=Depends(get_upload), @@ -118,7 +119,7 @@ async def submit_download( try: dest = upload(file) # Start processing config. - background_tasks.add_task(start_processing_config, dest, licenses) + background_tasks.add_task(start_processing_config, dest, licenses, force_download) return { "message": f"file '{file.filename}' saved at '{dest}' successfully." } From facc90982dceefeb171a344b00734b3ff7e9b981 Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Tue, 8 Aug 2023 10:30:50 +0000 Subject: [PATCH 29/51] await create_deployment call in main.py fast-api server. --- weather_dl_v2/fastapi-server/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index c058b87f..f8b36350 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -23,7 +23,7 @@ async def create_pending_license_deployments(): for license_id in license_list: try: logger.info(f"Creating license deployment for {license_id}") - create_deployment(license_id, license_handler) + await create_deployment(license_id, license_handler) except Exception as e: logger.error(f"License deployment failed for {license_id}. Exception: {e}") From a07229ea2e9aeaabef2ddd7ff452cb044501395d Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Tue, 8 Aug 2023 13:02:11 +0000 Subject: [PATCH 30/51] Improved config partitioning -- maintain status. --- .../config_processing/pipeline.py | 17 +++++++++++++---- .../database/download_handler.py | 19 +++++++++++++++++++ .../fastapi-server/routers/download.py | 11 ++++++----- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index 5d930531..f823eb17 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -1,4 +1,5 @@ import getpass +import logging import os from .parsers import process_config from .partition import PartitionConfig @@ -7,6 +8,8 @@ from database.queue_handler import get_queue_handler from fastapi.concurrency import run_in_threadpool +logger = logging.getLogger(__name__) + download_handler = get_download_handler() queue_handler = get_queue_handler() @@ -32,9 +35,15 @@ async def start_processing_config(config_file, licenses, force_download): partition_obj = PartitionConfig(config, None, manifest) - # Prepare partitions - await run_in_threadpool(_do_partitions, partition_obj) - # Make entry in 'download' & 'queues' collection. await download_handler._start_download(config_name, config.client) - await queue_handler._update_queues_on_start_download(config_name, licenses) + await download_handler._mark_partitioning_status(config_name, "Partitioning in-progress.") + try: + # Prepare partitions + await run_in_threadpool(_do_partitions, partition_obj) + await download_handler._mark_partitioning_status(config_name, "Partitioning completed.") + await queue_handler._update_queues_on_start_download(config_name, licenses) + except Exception as e: + error_str = f"Partitioning failed for {config_name} due to {e}." + logger.error(error_str) + await download_handler._mark_partitioning_status(config_name, error_str) diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index e8fa79f8..650c00d5 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -26,6 +26,10 @@ async def _start_download(self, config_name: str, client_name: str) -> None: @abc.abstractmethod async def _stop_download(self, config_name: str) -> None: pass + + @abc.abstractmethod + async def _mark_partitioning_status(self, config_name: str, status: str) -> None: + pass @abc.abstractmethod async def _check_download_exists(self, config_name: str) -> bool: @@ -55,6 +59,11 @@ async def _stop_download(self, config_name: str) -> None: f"Removed {config_name} in 'download' collection. Update_time: 000000." ) + async def _mark_partitioning_status(self, config_name: str, status: str) -> None: + logger.info( + f"Updated {config_name} in 'download' collection. Update_time: 000000." + ) + async def _check_download_exists(self, config_name: str) -> bool: if config_name == "no_exist": return False @@ -95,6 +104,16 @@ async def _stop_download(self, config_name: str) -> None: f"Removed {config_name} in 'download' collection. Update_time: {timestamp}." ) + async def _mark_partitioning_status(self, config_name: str, status: str) -> None: + timestamp = ( + await self.db.collection(self.collection) + .document(config_name) + .update({"status": status}) + ) + logger.info( + f"Updated {config_name} in 'download' collection. Update_time: {timestamp}." + ) + async def _check_download_exists(self, config_name: str) -> bool: result: DocumentSnapshot = ( await self.db.collection(self.collection).document(config_name).get() diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 3d3b6f22..397d043c 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -19,7 +19,7 @@ async def fetch_config_stats( - config_name: str, client_name: str, manifest_handler: ManifestHandler + config_name: str, client_name: str, status: str, manifest_handler: ManifestHandler ): """Get all the config stats parallely.""" @@ -46,6 +46,7 @@ async def fetch_config_stats( return { "config_name": config_name, "client_name": client_name, + "partitioning_status": status, "downloaded_shards": success_count, "scheduled_shards": scheduled_count, "failed_shards": failure_count, @@ -145,7 +146,7 @@ async def get_downloads( for download in downloads: coroutines.append( fetch_config_stats( - download["config_name"], download["client_name"], manifest_handler + download["config_name"], download["client_name"], download['status'], manifest_handler ) ) @@ -160,9 +161,9 @@ async def get_download_by_config_name( manifest_handler: ManifestHandler = Depends(get_manifest_handler), fetch_config_stats=Depends(get_fetch_config_stats), ): - config = await download_handler._get_download_by_config_name(config_name) + download = await download_handler._get_download_by_config_name(config_name) - if config is None: + if download is None: logger.error(f"Download config {config_name} not found in weather-dl v2.") raise HTTPException( status_code=404, @@ -170,7 +171,7 @@ async def get_download_by_config_name( ) return await fetch_config_stats( - config["config_name"], config["client_name"], manifest_handler + download["config_name"], download["client_name"], download['status'], manifest_handler ) From 2b76056ad03a30036c651c538a4f888bcacd064c Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 9 Aug 2023 21:55:57 +0530 Subject: [PATCH 31/51] refetch config (#378) * added config refetching * added cli command for refetch * updated docs * added license check * loader for cli --- weather_dl_v2/cli/CLI-Documentation.md | 17 +++ .../cli/app/services/download_service.py | 12 ++ .../cli/app/services/network_service.py | 74 +++++------ weather_dl_v2/cli/app/subcommands/download.py | 9 ++ weather_dl_v2/cli/app/utils.py | 59 +++++++++ .../fastapi-server/API-Interactions.md | 1 + weather_dl_v2/fastapi-server/README.md | 5 - .../config_processing/manifest.py | 5 +- .../config_processing/pipeline.py | 8 +- .../database/download_handler.py | 2 +- .../database/manifest_handler.py | 29 +++++ .../fastapi-server/database/queue_handler.py | 4 +- .../fastapi-server/routers/download.py | 118 +++++++++++++++++- .../fastapi-server/routers/queues.py | 7 +- 14 files changed, 290 insertions(+), 60 deletions(-) diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md index e4f86bfe..060dcd15 100644 --- a/weather_dl_v2/cli/CLI-Documentation.md +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -90,6 +90,23 @@ weather-dl-v2 download remove example.cfg ``` +
+ weather-dl-v2 download refetch
+ Refetch all non-successful partitions of a config. +
+ +##### Arguments +> `CONFIG_NAME` : Name of the download config. + +##### Options +> `-l/--license` (Required): License ID to which this download has to be added to. + +##### Usage +``` +weather-dl-v2 download refetch example.cfg -l L1 -l L2 +``` +
+
## License diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 171f4300..970f39c1 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -1,5 +1,6 @@ import abc import logging +import json import typing as t from app.services.network_service import network_service from app.config import Config @@ -30,6 +31,10 @@ def _add_new_download(self, file_path: str, licenses: t.List[str], def _remove_download(self, config_name: str): pass + @abc.abstractmethod + def _refetch_config_partitions(self, config_name: str, licenses: t.List[str]): + pass + class DownloadServiceNetwork(DownloadService): @@ -73,6 +78,13 @@ def _remove_download(self, config_name: str): return network_service.delete( uri=f"{self.endpoint}/{config_name}", header={"accept": "application/json"} ) + + def _refetch_config_partitions(self, config_name: str, licenses: t.List[str]): + return network_service.post( + uri=f"{self.endpoint}/retry/{config_name}", + header={"accept": "application/json"}, + payload=json.dumps({"licenses": licenses}) + ) class DownloadServiceMock(DownloadService): diff --git a/weather_dl_v2/cli/app/services/network_service.py b/weather_dl_v2/cli/app/services/network_service.py index f614ca67..e95ab563 100644 --- a/weather_dl_v2/cli/app/services/network_service.py +++ b/weather_dl_v2/cli/app/services/network_service.py @@ -1,22 +1,10 @@ import requests import json import logging -from time import time - +from app.utils import Loader, timeit logger = logging.getLogger(__name__) -def timeit(func): - def wrap_func(*args, **kwargs): - t1 = time() - result = func(*args, **kwargs) - t2 = time() - print(f"[executed in {(t2-t1):.4f}s.]") - return result - - return wrap_func - - class NetworkService: def parse_response(self, response: requests.Response): @@ -32,44 +20,48 @@ def parse_response(self, response: requests.Response): @timeit def get(self, uri, header, query=None, payload=None): - try: - x = requests.get(uri, params=query, headers=header, data=payload) - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + with Loader("Sending request..."): + try: + x = requests.get(uri, params=query, headers=header, data=payload) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) @timeit def post(self, uri, header, query=None, payload=None, file=None): - try: - x = requests.post( - uri, params=query, headers=header, data=payload, files=file - ) - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + with Loader("Sending request..."): + try: + x = requests.post( + uri, params=query, headers=header, data=payload, files=file + ) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) @timeit def put(self, uri, header, query=None, payload=None, file=None): - try: - x = requests.put( - uri, params=query, headers=header, data=payload, files=file - ) + with Loader("Sending request..."): + try: + x = requests.put( + uri, params=query, headers=header, data=payload, files=file + ) - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) @timeit def delete(self, uri, header, query=None): - try: - x = requests.delete(uri, params=query, headers=header) - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + with Loader("Sending request..."): + try: + x = requests.delete(uri, params=query, headers=header) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) network_service = NetworkService() diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 92f566c8..c6cc8004 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -64,3 +64,12 @@ def remove_download( config_name: Annotated[str, typer.Argument(help="Config file name.")] ): print(download_service._remove_download(config_name)) + +@app.command("refetch", help="Reschedule all partitions of a config that are not successful.") +def refetch_config( + config_name: Annotated[str, typer.Argument(help="Config file name.")], + license: Annotated[ + List[str], typer.Option("--license", "-l", help="License ID.") + ], +): + print(download_service._refetch_config_partitions(config_name, license)) diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 583b690d..e27ca7f2 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -3,10 +3,69 @@ import dataclasses import typing as t import json +from time import time +from itertools import cycle +from shutil import get_terminal_size +from threading import Thread +from time import sleep logger = logging.getLogger(__name__) + +def timeit(func): + def wrap_func(*args, **kwargs): + t1 = time() + result = func(*args, **kwargs) + t2 = time() + print(f"[executed in {(t2-t1):.4f}s.]") + return result + + return wrap_func + +class Loader: + def __init__(self, desc="Loading...", end="Done!", timeout=0.1): + """ + A loader-like context manager + + Args: + desc (str, optional): The loader's description. Defaults to "Loading...". + end (str, optional): Final print. Defaults to "Done!". + timeout (float, optional): Sleep time between prints. Defaults to 0.1. + """ + self.desc = desc + self.end = end + self.timeout = timeout + + self._thread = Thread(target=self._animate, daemon=True) + self.steps = ["⢿", "⣻", "⣽", "⣾", "⣷", "⣯", "⣟", "⡿"] + self.done = False + + def start(self): + self._thread.start() + return self + + def _animate(self): + for c in cycle(self.steps): + if self.done: + break + print(f"\r{self.desc} {c}", flush=True, end="") + sleep(self.timeout) + + def __enter__(self): + self.start() + + def stop(self): + self.done = True + cols = get_terminal_size((80, 20)).columns + print("\r" + " " * cols, end="", flush=True) + print(f"\r{self.end}", flush=True) + + def __exit__(self, exc_type, exc_value, tb): + # handle exceptions with those variables ^ + self.stop() + + @dataclasses.dataclass class Validator(abc.ABC): valid_keys: t.List[str] diff --git a/weather_dl_v2/fastapi-server/API-Interactions.md b/weather_dl_v2/fastapi-server/API-Interactions.md index f5a8f699..45a9d768 100644 --- a/weather_dl_v2/fastapi-server/API-Interactions.md +++ b/weather_dl_v2/fastapi-server/API-Interactions.md @@ -8,6 +8,7 @@ | `weather-dl-v2 download list --filter client_name=` | `get` | `/download?client_name={name}` | | `weather-dl-v2 download get ` | `get` | `/download/{config_name}` | | `weather-dl-v2 download remove ` | `delete` | `/download/{config_name}` | +| `weather-dl-v2 download refetch -l ` | `post` | `/download/refetch/{config_name}` | | License | | | | `weather-dl-v2 license add ` | `post` | `/license/` | | `weather-dl-v2 license get ` | `get` | `/license/{license_id}` | diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 20392c36..27800079 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -45,11 +45,6 @@ conda activate weather-dl-v2-server Please make approriate changes in server config, if required. ``` -### Add manifest collection name in config_processing/manifest.py -``` -Please write down the manifest collection name at Line 498 of manifest.py. -``` - ### To run fastapi server: ``` uvicorn main:app --reload diff --git a/weather_dl_v2/fastapi-server/config_processing/manifest.py b/weather_dl_v2/fastapi-server/config_processing/manifest.py index 062bb66f..22aeb5e8 100644 --- a/weather_dl_v2/fastapi-server/config_processing/manifest.py +++ b/weather_dl_v2/fastapi-server/config_processing/manifest.py @@ -25,7 +25,7 @@ from firebase_admin import firestore from google.cloud.firestore_v1 import DocumentReference from google.cloud.firestore_v1.types import WriteResult - +from server_config import get_config from database.session import Database """An implementation-dependent Manifest URI.""" @@ -494,6 +494,5 @@ def _update(self, download_status: DownloadStatus) -> None: def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" - # TODO: Get user-defined collection for manifest. - root_collection = "XXXXXXXXXXXX" + root_collection = get_config().manifest_collection return self._get_db().collection(root_collection).document(store_scheme) diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index f823eb17..53534aa4 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -37,11 +37,15 @@ async def start_processing_config(config_file, licenses, force_download): # Make entry in 'download' & 'queues' collection. await download_handler._start_download(config_name, config.client) - await download_handler._mark_partitioning_status(config_name, "Partitioning in-progress.") + await download_handler._mark_partitioning_status( + config_name, "Partitioning in-progress." + ) try: # Prepare partitions await run_in_threadpool(_do_partitions, partition_obj) - await download_handler._mark_partitioning_status(config_name, "Partitioning completed.") + await download_handler._mark_partitioning_status( + config_name, "Partitioning completed." + ) await queue_handler._update_queues_on_start_download(config_name, licenses) except Exception as e: error_str = f"Partitioning failed for {config_name} due to {e}." diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index 650c00d5..1238a526 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -26,7 +26,7 @@ async def _start_download(self, config_name: str, client_name: str) -> None: @abc.abstractmethod async def _stop_download(self, config_name: str) -> None: pass - + @abc.abstractmethod async def _mark_partitioning_status(self, config_name: str, status: str) -> None: pass diff --git a/weather_dl_v2/fastapi-server/database/manifest_handler.py b/weather_dl_v2/fastapi-server/database/manifest_handler.py index 371c3758..1edafd2e 100644 --- a/weather_dl_v2/fastapi-server/database/manifest_handler.py +++ b/weather_dl_v2/fastapi-server/database/manifest_handler.py @@ -38,6 +38,10 @@ async def _get_download_inprogress_count(self, config_name: str) -> int: async def _get_download_total_count(self, config_name: str) -> int: pass + @abc.abstractmethod + async def _get_non_successfull_downloads(self, config_name: str) -> list: + pass + class ManifestHandlerMock(ManifestHandler): @@ -56,6 +60,9 @@ async def _get_download_success_count(self, config_name: str) -> int: async def _get_download_total_count(self, config_name: str) -> int: return 0 + async def _get_non_successfull_downloads(self, config_name: str) -> list: + return [] + class ManifestHandlerFirestore(ManifestHandler): @@ -135,3 +142,25 @@ async def _get_download_total_count(self, config_name: str) -> int: count = result[0][0].value return count + + async def _get_non_successfull_downloads(self, config_name: str) -> list: + or_filter = Or( + filters=[ + FieldFilter("stage", "==", "fetch"), + FieldFilter("stage", "==", "download"), + And( + filters=[ + FieldFilter("status", "!=", "success"), + FieldFilter("stage", "==", "upload"), + ] + ), + ] + ) + + docs = ( + self.db.collection(self.collection) + .where(filter=FieldFilter("config_name", "==", config_name)) + .where(filter=or_filter) + .stream() + ) + return [doc.to_dict() async for doc in docs] diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index d703d3db..62c8f2ff 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -106,7 +106,9 @@ async def _update_queues_on_stop_download(self, config_name: str) -> None: async def _update_config_priority_in_license( self, license_id: str, config_name: str, priority: int ) -> None: - logger.info("Updated snapshot.id queue in 'queues' collection. Update_time: 00000.") + logger.info( + "Updated snapshot.id queue in 'queues' collection. Update_time: 00000." + ) class QueueHandlerFirestore(QueueHandler): diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 397d043c..72cc74ee 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -2,12 +2,16 @@ import logging import os import shutil +import json -from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, Depends +from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, Depends, Body from config_processing.pipeline import start_processing_config from database.download_handler import DownloadHandler, get_download_handler from database.queue_handler import QueueHandler, get_queue_handler +from database.license_handler import LicenseHandler, get_license_handler from database.manifest_handler import ManifestHandler, get_manifest_handler +from config_processing.manifest import FirestoreManifest, Manifest +from fastapi.concurrency import run_in_threadpool logger = logging.getLogger(__name__) @@ -93,6 +97,59 @@ def upload(file: UploadFile): return upload +def get_reschedule_partitions(): + def invoke_manifest_schedule(partition_list: list, manifest: Manifest): + for partition in partition_list: + manifest.schedule( + partition["config_name"], + partition["dataset"], + json.loads(partition["selection"]), + partition["location"], + partition["username"], + ) + + async def reschedule_partitions( + config_name: str, + licenses: list, + manifest_handler: ManifestHandler, + download_handler: DownloadHandler, + queue_handler: QueueHandler, + ): + partition_list = await manifest_handler._get_non_successfull_downloads( + config_name + ) + manifest = FirestoreManifest() + await download_handler._mark_partitioning_status( + config_name, "Partitioning in-progress." + ) + + try: + await run_in_threadpool(invoke_manifest_schedule, partition_list, manifest) + await download_handler._mark_partitioning_status( + config_name, "Partitioning completed." + ) + await queue_handler._update_queues_on_start_download(config_name, licenses) + except Exception as e: + error_str = f"Partitioning failed for {config_name} due to {e}." + logger.error(error_str) + await download_handler._mark_partitioning_status(config_name, error_str) + + return reschedule_partitions + + +def get_reschedule_partitions_mock(): + def reschedule_partitions( + config_name: str, + licenses: list, + manifest_handler: ManifestHandler, + download_handler: DownloadHandler, + queue_handler: QueueHandler, + ): + pass + + return reschedule_partitions + + # Can submit a config to the server. @router.post("/") async def submit_download( @@ -101,6 +158,7 @@ async def submit_download( force_download: bool = False, background_tasks: BackgroundTasks = BackgroundTasks(), download_handler: DownloadHandler = Depends(get_download_handler), + license_handler: LicenseHandler = Depends(get_license_handler), upload=Depends(get_upload), ): if not file: @@ -117,10 +175,19 @@ async def submit_download( detail=f"Please stop the ongoing download of the config file '{file.filename}' " "before attempting to start a new download.", ) + + for license_id in licenses: + if not await license_handler._check_license_exists(license_id): + logger.info(f"No such license {license_id}.") + raise HTTPException( + status_code=404, detail=f"No such license {license_id}." + ) try: dest = upload(file) # Start processing config. - background_tasks.add_task(start_processing_config, dest, licenses, force_download) + background_tasks.add_task( + start_processing_config, dest, licenses, force_download + ) return { "message": f"file '{file.filename}' saved at '{dest}' successfully." } @@ -146,7 +213,10 @@ async def get_downloads( for download in downloads: coroutines.append( fetch_config_stats( - download["config_name"], download["client_name"], download['status'], manifest_handler + download["config_name"], + download["client_name"], + download["status"], + manifest_handler, ) ) @@ -171,7 +241,10 @@ async def get_download_by_config_name( ) return await fetch_config_stats( - download["config_name"], download["client_name"], download['status'], manifest_handler + download["config_name"], + download["client_name"], + download["status"], + manifest_handler, ) @@ -195,3 +268,40 @@ async def delete_download( "config_name": config_name, "message": "Download config stopped & removed successfully.", } + + +@router.post("/retry/{config_name}") +async def retry_config( + config_name: str, + licenses: list = Body(embed=True), + background_tasks: BackgroundTasks = BackgroundTasks(), + download_handler: DownloadHandler = Depends(get_download_handler), + queue_handler: QueueHandler = Depends(get_queue_handler), + manifest_handler: ManifestHandler = Depends(get_manifest_handler), + license_handler: LicenseHandler = Depends(get_license_handler), + reschedule_partitions=Depends(get_reschedule_partitions), +): + if not await download_handler._check_download_exists(config_name): + logger.error(f"No such download config {config_name} to stop & remove.") + raise HTTPException( + status_code=404, + detail=f"No such download config {config_name} to stop & remove.", + ) + + for license_id in licenses: + if not await license_handler._check_license_exists(license_id): + logger.info(f"No such license {license_id}.") + raise HTTPException( + status_code=404, detail=f"No such license {license_id}." + ) + + background_tasks.add_task( + reschedule_partitions, + config_name, + licenses, + manifest_handler, + download_handler, + queue_handler, + ) + + return {"msg": "Refetch initiated successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py index 75252e1f..5ab7386d 100644 --- a/weather_dl_v2/fastapi-server/routers/queues.py +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -49,18 +49,19 @@ async def modify_license_queue( priority_list: list | None = [], queue_handler: QueueHandler = Depends(get_queue_handler), license_handler: LicenseHandler = Depends(get_license_handler), - download_handler: DownloadHandler = Depends(get_download_handler) + download_handler: DownloadHandler = Depends(get_download_handler), ): if not await license_handler._check_license_exists(license_id): logger.error(f"License {license_id} not found.") raise HTTPException(status_code=404, detail=f"License {license_id} not found.") - + for config_name in priority_list: config = await download_handler._get_download_by_config_name(config_name) if config is None: logger.error(f"Download config {config_name} not found in weather-dl v2.") raise HTTPException( - status_code=404, detail=f"Download config {config_name} not found in weather-dl v2." + status_code=404, + detail=f"Download config {config_name} not found in weather-dl v2.", ) try: await queue_handler._update_license_queue(license_id, priority_list) From 8659fd4b968395ba8c883cf001062cb6f965bafd Mon Sep 17 00:00:00 2001 From: Rahul Mahrsee Date: Thu, 10 Aug 2023 05:59:11 +0000 Subject: [PATCH 32/51] Assign download jobs to its dedicated node-pool. --- weather_dl_v2/license_deployment/downloader.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/weather_dl_v2/license_deployment/downloader.yaml b/weather_dl_v2/license_deployment/downloader.yaml index 9b7252f8..21d1f6dd 100644 --- a/weather_dl_v2/license_deployment/downloader.yaml +++ b/weather_dl_v2/license_deployment/downloader.yaml @@ -6,6 +6,8 @@ spec: ttlSecondsAfterFinished: 0 template: spec: + nodeSelector: + cloud.google.com/gke-nodepool: downloader-pool containers: - name: downloader image: XXXXXXX From f951db0a7edf14b01c82d966f6510a8400bf192c Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Thu, 17 Aug 2023 14:19:36 +0530 Subject: [PATCH 33/51] `dl-v2` improvements (#380) * added deployment config * added cli config * command to see and update server ip address. * added basic tools to docker container * lint fixes * minor fix, removed pring stmt * minor fix, cli * minor fix, cli * minor fix, license dep * minor fix, server --- weather_dl_v2/cli/CLI-Documentation.md | 31 ++++++++++++- weather_dl_v2/cli/Dockerfile | 10 ++++- weather_dl_v2/cli/README.md | 4 +- weather_dl_v2/cli/app/cli_config.py | 45 +++++++++++++++++++ weather_dl_v2/cli/app/config.py | 10 ----- weather_dl_v2/cli/app/main.py | 18 ++++---- .../cli/app/services/download_service.py | 18 ++++---- .../cli/app/services/license_service.py | 4 +- .../cli/app/services/network_service.py | 1 + .../cli/app/services/queue_service.py | 4 +- weather_dl_v2/cli/app/subcommands/config.py | 45 +++++++++++++++++++ weather_dl_v2/cli/app/subcommands/download.py | 24 +++++----- weather_dl_v2/cli/app/utils.py | 6 +-- weather_dl_v2/cli/cli_config.json | 4 ++ weather_dl_v2/fastapi-server/README.md | 2 +- .../fastapi-server/server_config.json | 2 +- weather_dl_v2/license_deployment/README.md | 4 +- weather_dl_v2/license_deployment/database.py | 19 +++++--- .../license_deployment/deployment_config.json | 6 +++ .../license_deployment/deployment_config.py | 41 +++++++++++++++++ weather_dl_v2/license_deployment/manifest.py | 10 +++-- 21 files changed, 246 insertions(+), 62 deletions(-) create mode 100644 weather_dl_v2/cli/app/cli_config.py delete mode 100644 weather_dl_v2/cli/app/config.py create mode 100644 weather_dl_v2/cli/app/subcommands/config.py create mode 100644 weather_dl_v2/cli/cli_config.json create mode 100644 weather_dl_v2/license_deployment/deployment_config.json create mode 100644 weather_dl_v2/license_deployment/deployment_config.py diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md index 060dcd15..0905d823 100644 --- a/weather_dl_v2/cli/CLI-Documentation.md +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -265,4 +265,33 @@ weather-dl-v2 queue edit L1 --config example.cfg --priority 0 ``` -
\ No newline at end of file +
+ +## Config +Configurations for cli. + +
+ weather-dl-v2 config show-ip
+See the current server IP address. +
+ +##### Usage +``` +weather-dl-v2 config show-ip +``` +
+ +
+ weather-dl-v2 config set-ip
+See the current server IP address. +
+ +##### Arguments +> `NEW_IP` : New IP address. (Do not add port or protocol). + +##### Usage +``` +weather-dl-v2 config set-ip 127.0.0.1 +``` +
+ diff --git a/weather_dl_v2/cli/Dockerfile b/weather_dl_v2/cli/Dockerfile index f8e0b336..6039ad2f 100644 --- a/weather_dl_v2/cli/Dockerfile +++ b/weather_dl_v2/cli/Dockerfile @@ -11,4 +11,12 @@ ARG CONDA_ENV_NAME=weather-dl-v2-cli RUN echo "source activate ${CONDA_ENV_NAME}" >> ~/.bashrc ENV PATH /opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH -ENV BASE_URI=http://:8080 +RUN apt-get update -y +RUN apt-get install nano -y +RUN apt-get install vim -y +RUN apt-get curl -y + +# TODO: Install gsutil +# RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-443.0.0-linux-arm.tar.gz +# RUN tar -xf google-cloud-cli-443.0.0-linux-arm.tar.gz +# RUN ./google-cloud-sdk/install.sh \ No newline at end of file diff --git a/weather_dl_v2/cli/README.md b/weather_dl_v2/cli/README.md index 12ce31cc..371142dd 100644 --- a/weather_dl_v2/cli/README.md +++ b/weather_dl_v2/cli/README.md @@ -5,9 +5,9 @@ This is a command line interface for talking to the weather-dl-v2 FastAPI server while deploying our FastAPI server. Hence we need to deploy the CLI on a VM to interact through our fastapi server. -Replace the FastAPI server pod's IP in Dockerfile (at line 8). +Replace the FastAPI server pod's IP in cli_config.json. ``` -ENV BASE_URI=http://:8080 +Please make approriate changes in cli_config.json, if required. ``` > Note: Command to get the Pod IP : `kubectl get pods -o wide`. > diff --git a/weather_dl_v2/cli/app/cli_config.py b/weather_dl_v2/cli/app/cli_config.py new file mode 100644 index 00000000..2ea340d1 --- /dev/null +++ b/weather_dl_v2/cli/app/cli_config.py @@ -0,0 +1,45 @@ +import dataclasses +import typing as t +import json +import os + +Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet + + +@dataclasses.dataclass +class CliConfig: + pod_ip: str = "" + port: str = "" + + @property + def BASE_URI(self) -> str: + return f"http://{self.pod_ip}:{self.port}" + + kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) + + @classmethod + def from_dict(cls, config: t.Dict): + config_instance = cls() + + for key, value in config.items(): + if hasattr(config_instance, key): + setattr(config_instance, key, value) + else: + config_instance.kwargs[key] = value + + return config_instance + + +cli_config = None + + +def get_config(): + global cli_config + cli_config_json = os.path.join(os.getcwd(), "cli_config.json") + + if cli_config is None: + with open(cli_config_json) as file: + firestore_dict = json.load(file) + cli_config = CliConfig.from_dict(firestore_dict) + + return cli_config diff --git a/weather_dl_v2/cli/app/config.py b/weather_dl_v2/cli/app/config.py deleted file mode 100644 index 0ea12e4a..00000000 --- a/weather_dl_v2/cli/app/config.py +++ /dev/null @@ -1,10 +0,0 @@ -import os - - -class Config: - - def __init__(self): - if "BASE_URI" in os.environ: - self.BASE_URI = os.environ["BASE_URI"] - else: - raise KeyError("BASE_URI not in environment.") diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py index 59c84bf9..64e059f9 100644 --- a/weather_dl_v2/cli/app/main.py +++ b/weather_dl_v2/cli/app/main.py @@ -1,8 +1,9 @@ import typer import logging -from app.config import Config +from app.cli_config import get_config import requests -from app.subcommands import download, queue, license +from app.subcommands import download, queue, license, config +from app.utils import Loader logger = logging.getLogger(__name__) @@ -13,17 +14,18 @@ app.add_typer(download.app, name="download", help="Manage downloads.") app.add_typer(queue.app, name="queue", help="Manage queues.") app.add_typer(license.app, name="license", help="Manage licenses.") +app.add_typer(config.app, name="config", help="Configurations for cli.") @app.command("ping", help="Check if FastAPI server is live and rechable.") def ping(): - uri = f"{Config().BASE_URI}/" - + uri = f"{get_config().BASE_URI}/" try: - x = requests.get(uri) - except requests.exceptions.RequestException as e: - raise SystemExit(e) - + with Loader("Sending request..."): + x = requests.get(uri) + except Exception as e: + print(f"error {e}") + return print(x.text) diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 970f39c1..86557b4c 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -3,7 +3,7 @@ import json import typing as t from app.services.network_service import network_service -from app.config import Config +from app.cli_config import get_config logger = logging.getLogger(__name__) @@ -23,8 +23,9 @@ def _get_download_by_config(self, config_name: str): pass @abc.abstractmethod - def _add_new_download(self, file_path: str, licenses: t.List[str], - force_download: bool): + def _add_new_download( + self, file_path: str, licenses: t.List[str], force_download: bool + ): pass @abc.abstractmethod @@ -39,7 +40,7 @@ def _refetch_config_partitions(self, config_name: str, licenses: t.List[str]): class DownloadServiceNetwork(DownloadService): def __init__(self): - self.endpoint = f"{Config().BASE_URI}/download" + self.endpoint = f"{get_config().BASE_URI}/download" def _list_all_downloads(self): return network_service.get( @@ -59,8 +60,9 @@ def _get_download_by_config(self, config_name: str): header={"accept": "application/json"}, ) - def _add_new_download(self, file_path: str, licenses: t.List[str], - force_download: bool): + def _add_new_download( + self, file_path: str, licenses: t.List[str], force_download: bool + ): try: file = {"file": open(file_path, "rb")} except FileNotFoundError: @@ -78,12 +80,12 @@ def _remove_download(self, config_name: str): return network_service.delete( uri=f"{self.endpoint}/{config_name}", header={"accept": "application/json"} ) - + def _refetch_config_partitions(self, config_name: str, licenses: t.List[str]): return network_service.post( uri=f"{self.endpoint}/retry/{config_name}", header={"accept": "application/json"}, - payload=json.dumps({"licenses": licenses}) + payload=json.dumps({"licenses": licenses}), ) diff --git a/weather_dl_v2/cli/app/services/license_service.py b/weather_dl_v2/cli/app/services/license_service.py index 9ef0ab39..fa8a5ce1 100644 --- a/weather_dl_v2/cli/app/services/license_service.py +++ b/weather_dl_v2/cli/app/services/license_service.py @@ -2,7 +2,7 @@ import logging import json from app.services.network_service import network_service -from app.config import Config +from app.cli_config import get_config logger = logging.getLogger(__name__) @@ -37,7 +37,7 @@ def _update_license(self, license_id: str, license_dict: dict): class LicenseServiceNetwork(LicenseService): def __init__(self): - self.endpoint = f"{Config().BASE_URI}/license" + self.endpoint = f"{get_config().BASE_URI}/license" def _get_all_license(self): return network_service.get( diff --git a/weather_dl_v2/cli/app/services/network_service.py b/weather_dl_v2/cli/app/services/network_service.py index e95ab563..a65ee2e7 100644 --- a/weather_dl_v2/cli/app/services/network_service.py +++ b/weather_dl_v2/cli/app/services/network_service.py @@ -2,6 +2,7 @@ import json import logging from app.utils import Loader, timeit + logger = logging.getLogger(__name__) diff --git a/weather_dl_v2/cli/app/services/queue_service.py b/weather_dl_v2/cli/app/services/queue_service.py index 1191642c..5f7dab7d 100644 --- a/weather_dl_v2/cli/app/services/queue_service.py +++ b/weather_dl_v2/cli/app/services/queue_service.py @@ -3,7 +3,7 @@ import json import typing as t from app.services.network_service import network_service -from app.config import Config +from app.cli_config import get_config logger = logging.getLogger(__name__) @@ -36,7 +36,7 @@ def _edit_config_absolute_priority( class QueueServiceNetwork(QueueService): def __init__(self): - self.endpoint = f"{Config().BASE_URI}/queues" + self.endpoint = f"{get_config().BASE_URI}/queues" def _get_all_license_queues(self): return network_service.get( diff --git a/weather_dl_v2/cli/app/subcommands/config.py b/weather_dl_v2/cli/app/subcommands/config.py new file mode 100644 index 00000000..9dee6cb5 --- /dev/null +++ b/weather_dl_v2/cli/app/subcommands/config.py @@ -0,0 +1,45 @@ +import typer +import json +import os +from typing_extensions import Annotated +from app.cli_config import get_config +from app.utils import Validator + +app = typer.Typer() + + +class ConfigValidator(Validator): + pass + + +@app.command("show-ip", help="See the current server IP address.") +def show_server_ip(): + print(f"Current pod IP: {get_config().pod_ip}") + + +@app.command("set-ip", help="Update the server IP address.") +def update_server_ip( + new_ip: Annotated[ + str, typer.Argument(help="New IP address. (Do not add port or protocol).") + ], +): + file_path = os.path.join(os.getcwd(), "cli_config.json") + cli_config = {} + with open(file_path, "r") as file: + cli_config = json.load(file) + + old_ip = cli_config["pod_ip"] + cli_config["pod_ip"] = new_ip + + with open(file_path, "w") as file: + json.dump(cli_config, file) + + validator = ConfigValidator(valid_keys=["pod_ip", "port"]) + + try: + cli_config = validator.validate_json(file_path=file_path) + except Exception as e: + print(f"payload error: {e}") + return + + print(f"Pod IP Updated {old_ip} -> {new_ip} .") diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index c6cc8004..138fdf60 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -38,17 +38,16 @@ def submit_download( file_path: Annotated[ str, typer.Argument(help="File path of config to be uploaded.") ], - license: Annotated[ - List[str], typer.Option("--license", "-l", help="License ID.") - ], + license: Annotated[List[str], typer.Option("--license", "-l", help="License ID.")], force_download: Annotated[ - bool, typer.Option( - "-f", "--force-download", - help="Force redownload of partitions that were previously downloaded." - ) + bool, + typer.Option( + "-f", + "--force-download", + help="Force redownload of partitions that were previously downloaded.", + ), ] = False, ): - print(download_service._add_new_download(file_path, license, force_download)) @@ -65,11 +64,12 @@ def remove_download( ): print(download_service._remove_download(config_name)) -@app.command("refetch", help="Reschedule all partitions of a config that are not successful.") + +@app.command( + "refetch", help="Reschedule all partitions of a config that are not successful." +) def refetch_config( config_name: Annotated[str, typer.Argument(help="Config file name.")], - license: Annotated[ - List[str], typer.Option("--license", "-l", help="License ID.") - ], + license: Annotated[List[str], typer.Option("--license", "-l", help="License ID.")], ): print(download_service._refetch_config_partitions(config_name, license)) diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index e27ca7f2..a8d71d20 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -12,7 +12,6 @@ logger = logging.getLogger(__name__) - def timeit(func): def wrap_func(*args, **kwargs): t1 = time() @@ -23,8 +22,10 @@ def wrap_func(*args, **kwargs): return wrap_func + class Loader: - def __init__(self, desc="Loading...", end="Done!", timeout=0.1): + + def __init__(self, desc="Loading...", end="", timeout=0.1): """ A loader-like context manager @@ -59,7 +60,6 @@ def stop(self): self.done = True cols = get_terminal_size((80, 20)).columns print("\r" + " " * cols, end="", flush=True) - print(f"\r{self.end}", flush=True) def __exit__(self, exc_type, exc_value, tb): # handle exceptions with those variables ^ diff --git a/weather_dl_v2/cli/cli_config.json b/weather_dl_v2/cli/cli_config.json new file mode 100644 index 00000000..076ed641 --- /dev/null +++ b/weather_dl_v2/cli/cli_config.json @@ -0,0 +1,4 @@ +{ + "pod_ip": "", + "port": 8080 +} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 27800079..ec237f3a 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -42,7 +42,7 @@ conda activate weather-dl-v2-server ### Make changes in server config, if required ``` -Please make approriate changes in server config, if required. +Please make approriate changes in server_config.json, if required. ``` ### To run fastapi server: diff --git a/weather_dl_v2/fastapi-server/server_config.json b/weather_dl_v2/fastapi-server/server_config.json index aba11b10..28a9cde9 100644 --- a/weather_dl_v2/fastapi-server/server_config.json +++ b/weather_dl_v2/fastapi-server/server_config.json @@ -2,5 +2,5 @@ "download_collection": "download", "queues_collection": "queues", "license_collection": "license", - "manifest_collection": "test_manifest" + "manifest_collection": "manifest" } \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/README.md b/weather_dl_v2/license_deployment/README.md index 9afd2bd1..a1624c97 100644 --- a/weather_dl_v2/license_deployment/README.md +++ b/weather_dl_v2/license_deployment/README.md @@ -12,9 +12,9 @@ conda activate weather-dl-v2-license-dep Please write down the downloader's docker image path at Line 11 of downloader.yaml. ``` -### Add manifest collection name in manifest.py +### Make changes in deployment config, if required ``` -Please write down the manifest collection name at Line 500 of manifest.py. +Please make approriate changes in deployment_config.json, if required. ``` ### Create docker image for license deployment diff --git a/weather_dl_v2/license_deployment/database.py b/weather_dl_v2/license_deployment/database.py index 1449e283..bc1414a5 100644 --- a/weather_dl_v2/license_deployment/database.py +++ b/weather_dl_v2/license_deployment/database.py @@ -8,6 +8,7 @@ from google.cloud.firestore_v1.types import WriteResult from google.cloud.firestore_v1.base_query import FieldFilter, And from util import get_wait_interval +from deployment_config import get_config logger = logging.getLogger(__name__) @@ -75,13 +76,19 @@ def _get_db(self) -> firestore.firestore.Client: def _initialize_license_deployment(self, license_id: str) -> dict: result: DocumentSnapshot = ( - self._get_db().collection("license").document(license_id).get() + self._get_db() + .collection(get_config().license_collection) + .document(license_id) + .get() ) return result.to_dict() def _get_config_from_queue_by_license_id(self, license_id: str) -> str | None: result: DocumentSnapshot = ( - self._get_db().collection("queues").document(license_id).get(["queue"]) + self._get_db() + .collection(get_config().queues_collection) + .document(license_id) + .get(["queue"]) ) if result.exists: queue = result.to_dict()["queue"] @@ -98,7 +105,7 @@ def _remove_config_from_license_queue( ) -> None: result: WriteResult = ( self._get_db() - .collection("queues") + .collection(get_config().queues_collection) .document(license_id) .update({"queue": firestore.ArrayRemove([config_name])}) ) @@ -116,7 +123,7 @@ def get_partition_from_manifest(transaction, config_name: str) -> str | None: snapshot = ( db_client._get_db() - .collection("test_manifest") + .collection(get_config().manifest_collection) .where(filter=and_filter) .limit(1) .get(transaction=transaction) @@ -127,7 +134,9 @@ def get_partition_from_manifest(transaction, config_name: str) -> str | None: return None ref: DocumentReference = ( - db_client._get_db().collection("test_manifest").document(snapshot.id) + db_client._get_db() + .collection(get_config().manifest_collection) + .document(snapshot.id) ) transaction.update(ref, {"status": "processing"}) diff --git a/weather_dl_v2/license_deployment/deployment_config.json b/weather_dl_v2/license_deployment/deployment_config.json new file mode 100644 index 00000000..28a9cde9 --- /dev/null +++ b/weather_dl_v2/license_deployment/deployment_config.json @@ -0,0 +1,6 @@ +{ + "download_collection": "download", + "queues_collection": "queues", + "license_collection": "license", + "manifest_collection": "manifest" +} \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/deployment_config.py b/weather_dl_v2/license_deployment/deployment_config.py new file mode 100644 index 00000000..9e598228 --- /dev/null +++ b/weather_dl_v2/license_deployment/deployment_config.py @@ -0,0 +1,41 @@ +import dataclasses +import typing as t +import json + +Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet + + +@dataclasses.dataclass +class DeploymentConfig: + download_collection: str = "" + queues_collection: str = "" + license_collection: str = "" + manifest_collection: str = "" + kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) + + @classmethod + def from_dict(cls, config: t.Dict): + config_instance = cls() + + for key, value in config.items(): + if hasattr(config_instance, key): + setattr(config_instance, key, value) + else: + config_instance.kwargs[key] = value + + return config_instance + + +deployment_config = None + + +def get_config(): + global deployment_config + deployment_config_json = "deployment_config.json" + + if deployment_config is None: + with open(deployment_config_json) as file: + firestore_dict = json.load(file) + deployment_config = DeploymentConfig.from_dict(firestore_dict) + + return deployment_config diff --git a/weather_dl_v2/license_deployment/manifest.py b/weather_dl_v2/license_deployment/manifest.py index fdb80ea8..8779d3c6 100644 --- a/weather_dl_v2/license_deployment/manifest.py +++ b/weather_dl_v2/license_deployment/manifest.py @@ -25,7 +25,7 @@ from firebase_admin import firestore from google.cloud.firestore_v1 import DocumentReference from google.cloud.firestore_v1.types import WriteResult - +from deployment_config import get_config from database import Database logger = logging.getLogger(__name__) @@ -496,6 +496,8 @@ def _update(self, download_status: DownloadStatus) -> None: def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" - # TODO: Get user-defined collection for manifest. - root_collection = "XXXXXXXXXXXXXXX" - return self._get_db().collection(root_collection).document(store_scheme) + return ( + self._get_db() + .collection(get_config().manifest_collection) + .document(store_scheme) + ) From d2dc65894810e40f5db48a65d9617907e07ce933 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Thu, 17 Aug 2023 16:02:57 +0530 Subject: [PATCH 34/51] `dl-v2` imporvements 2 (#381) * added gsutil to cli container * updated license id as user input * added license_id to cli license add command * minor fix * updated cli readme * minor fix --- weather_dl_v2/cli/CLI-Documentation.md | 2 ++ weather_dl_v2/cli/Dockerfile | 16 +++++++++++----- weather_dl_v2/cli/app/subcommands/license.py | 7 ++++--- .../fastapi-server/database/license_handler.py | 7 +++---- weather_dl_v2/fastapi-server/routers/license.py | 8 ++++++++ 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md index 0905d823..8ecb7684 100644 --- a/weather_dl_v2/cli/CLI-Documentation.md +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -120,11 +120,13 @@ Manage licenses. The json file should be in this format: ``` { + "license_id: , "client_name": , "number_of_requests": , "secret_id": } ``` +NOTE: `license_id` is case insensitive and has to be unique for each license. ##### Arguments diff --git a/weather_dl_v2/cli/Dockerfile b/weather_dl_v2/cli/Dockerfile index 6039ad2f..a2439002 100644 --- a/weather_dl_v2/cli/Dockerfile +++ b/weather_dl_v2/cli/Dockerfile @@ -2,6 +2,10 @@ FROM continuumio/miniconda3:latest COPY . . +# Add the mamba solver for faster builds +RUN conda install -n base conda-libmamba-solver +RUN conda config --set solver libmamba + # Create conda env using environment.yml RUN conda update conda -y RUN conda env create --name weather-dl-v2-cli --file=environment.yml @@ -14,9 +18,11 @@ ENV PATH /opt/conda/envs/${CONDA_ENV_NAME}/bin:$PATH RUN apt-get update -y RUN apt-get install nano -y RUN apt-get install vim -y -RUN apt-get curl -y +RUN apt-get install curl -y -# TODO: Install gsutil -# RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-443.0.0-linux-arm.tar.gz -# RUN tar -xf google-cloud-cli-443.0.0-linux-arm.tar.gz -# RUN ./google-cloud-sdk/install.sh \ No newline at end of file +# Install gsutil +RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-443.0.0-linux-arm.tar.gz +RUN tar -xf google-cloud-cli-443.0.0-linux-arm.tar.gz +RUN ./google-cloud-sdk/install.sh --quiet +RUN echo "if [ -f '/google-cloud-sdk/path.bash.inc' ]; then . '/google-cloud-sdk/path.bash.inc'; fi" >> /root/.bashrc +RUN echo "if [ -f '/google-cloud-sdk/completion.bash.inc' ]; then . '/google-cloud-sdk/completion.bash.inc'; fi" >> /root/.bashrc diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py index 428534ac..1cf6c608 100644 --- a/weather_dl_v2/cli/app/subcommands/license.py +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -43,12 +43,13 @@ def add_license( str, typer.Argument( help="""Input json file. Example json for new license-""" - """{"client_name" : , "number_of_requests" : , "secret_id" : }""" + """{"license_id" : , "client_name" : , "number_of_requests" : , "secret_id" : }""" + """\nNOTE: license_id is case insensitive and has to be unique for each license.""" ), - ], # noqa + ], ): validator = LicenseValidator( - valid_keys=["client_name", "number_of_requests", "secret_id"] + valid_keys=["license_id", "client_name", "number_of_requests", "secret_id"] ) try: diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index a5c19e05..2611d3d0 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -118,11 +118,10 @@ def __init__(self, db: firestore.firestore.AsyncClient): self.db = db self.collection = get_config().license_collection - # TODO: find alternative way to create license_id async def _add_license(self, license_dict: dict) -> str: - license_count = await self.db.collection(self.collection).count().get() - license_id = f"L{license_count[0][0].value + 1}" - license_dict["license_id"] = license_id + license_dict['license_id'] = license_dict['license_id'].lower() + license_id = license_dict['license_id'] + result: WriteResult = ( await self.db.collection(self.collection) .document(license_id) diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 72c5ce08..2a62f04b 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -12,6 +12,7 @@ # TODO: Make use of google secret manager. # REF: https://cloud.google.com/secret-manager. class License(BaseModel): + license_id: str client_name: str number_of_requests: int secret_id: str @@ -129,6 +130,13 @@ async def add_license( queue_handler: QueueHandler = Depends(get_queue_handler), create_deployment=Depends(get_create_deployment), ): + license_id = license.license_id.lower() + if await license_handler._check_license_exists(license_id): + logger.error(f"License with license_id {license_id} already exist.") + raise HTTPException( + status_code=409, detail=f"License with license_id {license_id} already exist." + ) + license_dict = license.dict() license_dict["k8s_deployment_id"] = "" license_id = await license_handler._add_license(license_dict) From e65b5ad0432cba18ef9281da2fc32c58416e1c5e Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Fri, 18 Aug 2023 17:32:01 +0530 Subject: [PATCH 35/51] added validation for license id (#382) --- .../database/license_handler.py | 6 +++--- .../fastapi-server/routers/download.py | 4 ++-- .../fastapi-server/routers/license.py | 21 +++++++++++++++++-- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index 2611d3d0..42477820 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -119,9 +119,9 @@ def __init__(self, db: firestore.firestore.AsyncClient): self.collection = get_config().license_collection async def _add_license(self, license_dict: dict) -> str: - license_dict['license_id'] = license_dict['license_id'].lower() - license_id = license_dict['license_id'] - + license_dict["license_id"] = license_dict["license_id"].lower() + license_id = license_dict["license_id"] + result: WriteResult = ( await self.db.collection(self.collection) .document(license_id) diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 72cc74ee..ff81af5c 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -175,7 +175,7 @@ async def submit_download( detail=f"Please stop the ongoing download of the config file '{file.filename}' " "before attempting to start a new download.", ) - + for license_id in licenses: if not await license_handler._check_license_exists(license_id): logger.info(f"No such license {license_id}.") @@ -287,7 +287,7 @@ async def retry_config( status_code=404, detail=f"No such download config {config_name} to stop & remove.", ) - + for license_id in licenses: if not await license_handler._check_license_exists(license_id): logger.info(f"No such license {license_id}.") diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 2a62f04b..071d6deb 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -1,5 +1,5 @@ import logging - +import re from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends from pydantic import BaseModel from license_dep.deployment_creator import create_license_deployment, terminate_license_deployment @@ -131,10 +131,27 @@ async def add_license( create_deployment=Depends(get_create_deployment), ): license_id = license.license_id.lower() + + # Check if license id is in correct format. + LICENSE_REGEX = re.compile( + r"[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*" + ) + if not bool(LICENSE_REGEX.fullmatch(license_id)): + logger.error( + """Invalid format for license_id. License id must consist of lower case alphanumeric""" + """ characters, '-' or '.', and must start and end with an alphanumeric character""" + ) + raise HTTPException( + status_code=400, + detail="""Invalid format for license_id. License id must consist of lower case alphanumeric""" + """ characters, '-' or '.', and must start and end with an alphanumeric character""" + ) + if await license_handler._check_license_exists(license_id): logger.error(f"License with license_id {license_id} already exist.") raise HTTPException( - status_code=409, detail=f"License with license_id {license_id} already exist." + status_code=409, + detail=f"License with license_id {license_id} already exist.", ) license_dict = license.dict() From bd9e4d17e0f3f25609022716f8c5f97db6e37925 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Tue, 22 Aug 2023 16:54:59 +0530 Subject: [PATCH 36/51] `dl-v2` additional download filters (#383) * added additonal filter params to download * added additional filter to cli validation * minor fixes * minor nit and updated cli docs * minor doc fix * allow multiple filters fix * updated inprogress filter * updated help string cli * Update cli docs --- weather_dl_v2/cli/CLI-Documentation.md | 9 +++++- .../cli/app/services/download_service.py | 6 ++-- weather_dl_v2/cli/app/subcommands/download.py | 20 +++++++----- weather_dl_v2/cli/app/utils.py | 16 ++++++---- .../fastapi-server/routers/download.py | 32 ++++++++++++++++++- .../fastapi-server/routers/license.py | 2 +- 6 files changed, 64 insertions(+), 21 deletions(-) diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md index 8ecb7684..bebeebbd 100644 --- a/weather_dl_v2/cli/CLI-Documentation.md +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -49,7 +49,10 @@ weather-dl-v2 download add /path/to/example.cfg –l L1 -l L2 [--force-download] List all the active downloads. -The list can also be filtered out by client_names. +The list can also be filtered out by client_names. +Available filters: +[key: client_name, values: cds, mars, ecpublic] +[key: status, values: completed, failed, in-progress] ##### Options > `--filter` : Filter the list by some key and value. Format of filter filter_key=filter_value @@ -58,6 +61,10 @@ The list can also be filtered out by client_names. ``` weather-dl-v2 download list weather-dl-v2 download list --filter client_name=cds +weather-dl-v2 download list --filter status=success +weather-dl-v2 download list --filter status=failed +weather-dl-v2 download list --filter status=in-progress +weather-dl-v2 download list --filter client_name=cds --filter status=success ``` diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 86557b4c..5ad6644e 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -15,7 +15,7 @@ def _list_all_downloads(self): pass @abc.abstractmethod - def _list_all_downloads_by_client_name(self, client_name: str): + def _list_all_downloads_by_filter(self, filter_dict: dict): pass @abc.abstractmethod @@ -47,11 +47,11 @@ def _list_all_downloads(self): uri=self.endpoint, header={"accept": "application/json"} ) - def _list_all_downloads_by_client_name(self, client_name: str): + def _list_all_downloads_by_filter(self, filter_dict: dict): return network_service.get( uri=self.endpoint, header={"accept": "application/json"}, - query={"client_name": client_name}, + query=filter_dict, ) def _get_download_by_config(self, config_name: str): diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 138fdf60..4aff8cc8 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -4,7 +4,7 @@ from app.utils import Validator from typing import List -app = typer.Typer() +app = typer.Typer(rich_markup_mode="markdown") class DowloadFilterValidator(Validator): @@ -14,20 +14,24 @@ class DowloadFilterValidator(Validator): @app.command("list", help="List out all the configs.") def get_downloads( filter: Annotated[ - str, typer.Option(help="Filter by some value. Format: filter_key=filter_value.") - ] = None + List[str], + typer.Option( + help="""Filter by some value. Format: filter_key=filter_value. Available filters """ + """[key: client_name, values: cds, mars, ecpublic] """ + """[key: status, values: completed, failed, in-progress]""" + ), + ] = [] ): - if filter: - validator = DowloadFilterValidator(valid_keys=["client_name"]) + if len(filter) > 0: + validator = DowloadFilterValidator(valid_keys=["client_name", "status"]) try: - data = validator.validate(filters=[filter]) - client_name = data["client_name"] + filter_dict = validator.validate(filters=filter, allow_missing=True) except Exception as e: print(f"filter error: {e}") return - print(download_service._list_all_downloads_by_client_name(client_name)) + print(download_service._list_all_downloads_by_filter(filter_dict)) return print(download_service._list_all_downloads()) diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index a8d71d20..87003b6c 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -70,7 +70,9 @@ def __exit__(self, exc_type, exc_value, tb): class Validator(abc.ABC): valid_keys: t.List[str] - def validate(self, filters: t.List[str], show_valid_filters=True): + def validate( + self, filters: t.List[str], show_valid_filters=True, allow_missing: bool = False + ): filter_dict = {} for filter in filters: @@ -87,7 +89,7 @@ def validate(self, filters: t.List[str], show_valid_filters=True): data_set = set(filter_dict.keys()) valid_set = set(self.valid_keys) - if self._validate_keys(data_set, valid_set): + if self._validate_keys(data_set, valid_set, allow_missing): return filter_dict def validate_json(self, file_path): @@ -106,17 +108,17 @@ def validate_json(self, file_path): logger.info("file not found.") raise FileNotFoundError - def _validate_keys(self, data_set: set, valid_set: set): - if data_set == valid_set: - return True - + def _validate_keys(self, data_set: set, valid_set: set, allow_missing: bool): missing_keys = valid_set.difference(data_set) invalid_keys = data_set.difference(valid_set) - if len(missing_keys) > 0: + if not allow_missing and len(missing_keys) > 0: raise ValueError(f"keys {missing_keys} are missing in file.") if len(invalid_keys) > 0: raise ValueError(f"keys {invalid_keys} are invalid keys.") + if allow_missing or data_set == valid_set: + return True + return False diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index ff81af5c..4fca2de9 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -4,6 +4,7 @@ import shutil import json +from enum import Enum from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, Depends, Body from config_processing.pipeline import start_processing_config from database.download_handler import DownloadHandler, get_download_handler @@ -198,11 +199,18 @@ async def submit_download( ) +class DownloadStatus(str, Enum): + COMPLETED = "completed" + FAILED = "failed" + IN_PROGRESS = "in-progress" + + # Can check the current status of the submitted config. # List status for all the downloads + handle filters @router.get("/") async def get_downloads( client_name: str | None = None, + status: DownloadStatus | None = None, download_handler: DownloadHandler = Depends(get_download_handler), manifest_handler: ManifestHandler = Depends(get_manifest_handler), fetch_config_stats=Depends(get_fetch_config_stats), @@ -220,7 +228,29 @@ async def get_downloads( ) ) - return await asyncio.gather(*coroutines) + config_details = await asyncio.gather(*coroutines) + + if status is None: + return config_details + + if status.value == DownloadStatus.COMPLETED: + return list( + filter( + lambda detail: detail["downloaded_shards"] == detail["total_shards"], + config_details, + ) + ) + elif status.value == DownloadStatus.FAILED: + return list(filter(lambda detail: detail["failed_shards"] > 0, config_details)) + elif status.value == DownloadStatus.IN_PROGRESS: + return list( + filter( + lambda detail: detail["downloaded_shards"] != detail["total_shards"], + config_details, + ) + ) + else: + return config_details # Get status of particular download diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 071d6deb..ebb826bb 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -144,7 +144,7 @@ async def add_license( raise HTTPException( status_code=400, detail="""Invalid format for license_id. License id must consist of lower case alphanumeric""" - """ characters, '-' or '.', and must start and end with an alphanumeric character""" + """ characters, '-' or '.', and must start and end with an alphanumeric character""", ) if await license_handler._check_license_exists(license_id): From 26aaedb2f5e9a64a1296c5ef80fad00272aad873 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 23 Aug 2023 16:23:35 +0530 Subject: [PATCH 37/51] `dl-v2` improvements 3 (#384) * added downloader config * added downloader k8 image to deployment_config * added license deployment image to server_config * added license id to error msg for failed manifest * lint fixes * minor nit * minor nit --- weather_dl_v2/downloader_kubernetes/README.md | 10 +---- .../downloader_kubernetes/downloader.py | 7 ++-- .../downloader_config.json | 3 ++ .../downloader_config.py | 38 +++++++++++++++++++ .../downloader_kubernetes/manifest.py | 13 +++++-- weather_dl_v2/fastapi-server/README.md | 5 --- .../license_dep/deployment_creator.py | 4 ++ .../fastapi-server/server_config.json | 3 +- weather_dl_v2/fastapi-server/server_config.py | 5 ++- weather_dl_v2/license_deployment/README.md | 5 --- .../license_deployment/deployment_config.json | 3 +- .../license_deployment/deployment_config.py | 5 ++- weather_dl_v2/license_deployment/fetch.py | 3 +- .../license_deployment/job_creator.py | 15 +++++++- weather_dl_v2/license_deployment/manifest.py | 4 +- 15 files changed, 89 insertions(+), 34 deletions(-) create mode 100644 weather_dl_v2/downloader_kubernetes/downloader_config.json create mode 100644 weather_dl_v2/downloader_kubernetes/downloader_config.py diff --git a/weather_dl_v2/downloader_kubernetes/README.md b/weather_dl_v2/downloader_kubernetes/README.md index bb0e3567..e3f36f47 100644 --- a/weather_dl_v2/downloader_kubernetes/README.md +++ b/weather_dl_v2/downloader_kubernetes/README.md @@ -9,15 +9,9 @@ We are not configuring any service account here hence make sure that compute eng * roles/bigquery.dataEditor * roles/bigquery.jobUser -### Add the manifest location path +### Make changes in downloader config, if required ``` -Please write down the manifest path at Line 43 of downloader.py. -Eg: "fs://test_manifest?projectId=XXX" -``` - -### Add manifest collection name in manifest.py -``` -Please write down the manifest collection name at Line 482 of manifest.py. +Please make approriate changes in downloader_config.json, if required. ``` ### Create docker image for downloader: diff --git a/weather_dl_v2/downloader_kubernetes/downloader.py b/weather_dl_v2/downloader_kubernetes/downloader.py index e0acbb53..4eab7e0d 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader.py +++ b/weather_dl_v2/downloader_kubernetes/downloader.py @@ -20,11 +20,12 @@ def download(url: str, path: str) -> None: download_with_aria2(url, path) -def main(config_name, dataset, selection, user_id, url, target_path) -> None: +def main( + config_name, dataset, selection, user_id, url, target_path, license_id +) -> None: """Download data from a client to a temp file.""" - manifest_location = "XXXXXXXXXX" - manifest = FirestoreManifest(manifest_location) + manifest = FirestoreManifest(license_id=license_id) temp_name = "" with manifest.transact(config_name, dataset, selection, target_path, user_id): with tempfile.NamedTemporaryFile(delete=False) as temp: diff --git a/weather_dl_v2/downloader_kubernetes/downloader_config.json b/weather_dl_v2/downloader_kubernetes/downloader_config.json new file mode 100644 index 00000000..b7e53cfc --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/downloader_config.json @@ -0,0 +1,3 @@ +{ + "manifest_collection": "manifest" +} \ No newline at end of file diff --git a/weather_dl_v2/downloader_kubernetes/downloader_config.py b/weather_dl_v2/downloader_kubernetes/downloader_config.py new file mode 100644 index 00000000..b66e90fa --- /dev/null +++ b/weather_dl_v2/downloader_kubernetes/downloader_config.py @@ -0,0 +1,38 @@ +import dataclasses +import typing as t +import json + +Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet + + +@dataclasses.dataclass +class DownloaderConfig: + manifest_collection: str = "" + kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) + + @classmethod + def from_dict(cls, config: t.Dict): + config_instance = cls() + + for key, value in config.items(): + if hasattr(config_instance, key): + setattr(config_instance, key, value) + else: + config_instance.kwargs[key] = value + + return config_instance + + +downloader_config = None + + +def get_config(): + global downloader_config + downloader_config_json = "downloader_config.json" + + if downloader_config is None: + with open(downloader_config_json) as file: + config_dict = json.load(file) + downloader_config = DownloaderConfig.from_dict(config_dict) + + return downloader_config diff --git a/weather_dl_v2/downloader_kubernetes/manifest.py b/weather_dl_v2/downloader_kubernetes/manifest.py index 6d0ae0dd..bd7c3d73 100644 --- a/weather_dl_v2/downloader_kubernetes/manifest.py +++ b/weather_dl_v2/downloader_kubernetes/manifest.py @@ -24,6 +24,7 @@ from firebase_admin import firestore from google.cloud.firestore_v1 import DocumentReference from google.cloud.firestore_v1.types import WriteResult +from downloader_config import get_config """An implementation-dependent Manifest URI.""" Location = t.NewType("Location", str) @@ -200,6 +201,7 @@ class Manifest(abc.ABC): # To reduce the impact of _read() and _update() calls # on the start time of the stage. + license_id: str = "" prev_stage_precise_start_time: t.Optional[str] = None status: t.Optional[DownloadStatus] = None @@ -328,7 +330,8 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: else: status = Status.FAILURE # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception - error = "\n".join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + error = f"license_id: {self.license_id} " + error += "\n".join(traceback.format_exception(exc_type, exc_inst, exc_tb)) new_status = dataclasses.replace(self.status) new_status.error = error @@ -478,6 +481,8 @@ def _update(self, download_status: DownloadStatus) -> None: def root_document_for_store(self, store_scheme: str) -> DocumentReference: """Get the root manifest document given the user's config and current document's storage location.""" - # TODO: Get user-defined collection for manifest. - root_collection = "XXXXXXXXXX" - return self._get_db().collection(root_collection).document(store_scheme) + return ( + self._get_db() + .collection(get_config().manifest_collection) + .document(store_scheme) + ) diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index ec237f3a..26d2198e 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -53,11 +53,6 @@ uvicorn main:app --reload * Open your browser at http://127.0.0.1:8000. -### Add path of created license deployment image in license_dep/license_deployment.yaml: -``` -Please write down the license deployment's docker image path at Line 22 of license_deployment.yaml. -``` - ### Create docker image for server: ``` export PROJECT_ID= diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py index 481cdae5..77927b0f 100644 --- a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -2,6 +2,7 @@ from os import path import yaml from kubernetes import client, config +from server_config import get_config logger = logging.getLogger(__name__) @@ -20,6 +21,9 @@ def create_license_deployment(license_id: str) -> str: "--license", license_id, ] + deployment_manifest["spec"]["template"]["spec"]["containers"][0][ + "image" + ] = get_config().license_deployment_image # Create an instance of the Kubernetes API client api_instance = client.AppsV1Api() diff --git a/weather_dl_v2/fastapi-server/server_config.json b/weather_dl_v2/fastapi-server/server_config.json index 28a9cde9..7b36be7c 100644 --- a/weather_dl_v2/fastapi-server/server_config.json +++ b/weather_dl_v2/fastapi-server/server_config.json @@ -2,5 +2,6 @@ "download_collection": "download", "queues_collection": "queues", "license_collection": "license", - "manifest_collection": "manifest" + "manifest_collection": "manifest", + "license_deployment_image": "XXXXXXXX" } \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/server_config.py b/weather_dl_v2/fastapi-server/server_config.py index 792f81f6..e177567e 100644 --- a/weather_dl_v2/fastapi-server/server_config.py +++ b/weather_dl_v2/fastapi-server/server_config.py @@ -11,6 +11,7 @@ class ServerConfig: queues_collection: str = "" license_collection: str = "" manifest_collection: str = "" + license_deployment_image: str = "" kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) @classmethod @@ -35,7 +36,7 @@ def get_config(): if server_config is None: with open(server_config_json) as file: - firestore_dict = json.load(file) - server_config = ServerConfig.from_dict(firestore_dict) + config_dict = json.load(file) + server_config = ServerConfig.from_dict(config_dict) return server_config diff --git a/weather_dl_v2/license_deployment/README.md b/weather_dl_v2/license_deployment/README.md index a1624c97..348394e6 100644 --- a/weather_dl_v2/license_deployment/README.md +++ b/weather_dl_v2/license_deployment/README.md @@ -7,11 +7,6 @@ conda env create --name weather-dl-v2-license-dep --file=environment.yml conda activate weather-dl-v2-license-dep ``` -### Add path of created downloader image in downloader.yaml -``` -Please write down the downloader's docker image path at Line 11 of downloader.yaml. -``` - ### Make changes in deployment config, if required ``` Please make approriate changes in deployment_config.json, if required. diff --git a/weather_dl_v2/license_deployment/deployment_config.json b/weather_dl_v2/license_deployment/deployment_config.json index 28a9cde9..9058be9d 100644 --- a/weather_dl_v2/license_deployment/deployment_config.json +++ b/weather_dl_v2/license_deployment/deployment_config.json @@ -2,5 +2,6 @@ "download_collection": "download", "queues_collection": "queues", "license_collection": "license", - "manifest_collection": "manifest" + "manifest_collection": "manifest", + "downloader_k8_image": "XXXXX" } \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/deployment_config.py b/weather_dl_v2/license_deployment/deployment_config.py index 9e598228..2c9e0329 100644 --- a/weather_dl_v2/license_deployment/deployment_config.py +++ b/weather_dl_v2/license_deployment/deployment_config.py @@ -11,6 +11,7 @@ class DeploymentConfig: queues_collection: str = "" license_collection: str = "" manifest_collection: str = "" + downloader_k8_image: str = "" kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) @classmethod @@ -35,7 +36,7 @@ def get_config(): if deployment_config is None: with open(deployment_config_json) as file: - firestore_dict = json.load(file) - deployment_config = DeploymentConfig.from_dict(firestore_dict) + config_dict = json.load(file) + deployment_config = DeploymentConfig.from_dict(config_dict) return deployment_config diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index ccdef96a..678b5025 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -26,6 +26,7 @@ def create_job(request, result): "user_id": request["username"], "url": result["href"], "target_path": request["location"], + "license_id": license_id, } data_str = json.dumps(res) @@ -36,7 +37,7 @@ def create_job(request, result): @exceptionit def make_fetch_request(request): client = CLIENTS[client_name](request["dataset"]) - manifest = FirestoreManifest() + manifest = FirestoreManifest(license_id=license_id) logger.info( f"By using {client_name} datasets, " f"users agree to the terms and conditions specified in {client.license_url!r}" diff --git a/weather_dl_v2/license_deployment/job_creator.py b/weather_dl_v2/license_deployment/job_creator.py index 75781001..6c335acc 100644 --- a/weather_dl_v2/license_deployment/job_creator.py +++ b/weather_dl_v2/license_deployment/job_creator.py @@ -3,12 +3,21 @@ import json import uuid from kubernetes import client, config +from deployment_config import get_config def create_download_job(message): """Creates a kubernetes workflow of type Job for downloading the data.""" parsed_message = json.loads(message) - config_name, dataset, selection, user_id, url, target_path = parsed_message.values() + ( + config_name, + dataset, + selection, + user_id, + url, + target_path, + license_id, + ) = parsed_message.values() selection = str(selection).replace(" ", "") config.load_config() @@ -27,6 +36,10 @@ def create_download_job(message): user_id, url, target_path, + license_id, ] + dep["spec"]["template"]["spec"]["containers"][0][ + "image" + ] = get_config().downloader_k8_image batch_api = client.BatchV1Api() batch_api.create_namespaced_job(body=dep, namespace="default") diff --git a/weather_dl_v2/license_deployment/manifest.py b/weather_dl_v2/license_deployment/manifest.py index 8779d3c6..39d6ad6d 100644 --- a/weather_dl_v2/license_deployment/manifest.py +++ b/weather_dl_v2/license_deployment/manifest.py @@ -207,6 +207,7 @@ class Manifest(abc.ABC): # To reduce the impact of _read() and _update() calls # on the start time of the stage. + license_id: str = "" prev_stage_precise_start_time: t.Optional[str] = None status: t.Optional[DownloadStatus] = None @@ -335,7 +336,8 @@ def __exit__(self, exc_type, exc_inst, exc_tb) -> None: else: status = Status.FAILURE # For explanation, see https://docs.python.org/3/library/traceback.html#traceback.format_exception - error = "\n".join(traceback.format_exception(exc_type, exc_inst, exc_tb)) + error = f"license_id: {self.license_id} " + error += "\n".join(traceback.format_exception(exc_type, exc_inst, exc_tb)) new_status = dataclasses.replace(self.status) new_status.error = error From 5fcf8a225e405dfdf753fa5e509d5ac1c92df6b4 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Tue, 29 Aug 2023 17:21:25 +0530 Subject: [PATCH 38/51] `dl-v2` improvements 4 (#386) * minor fixes and upgrades * updated network service in cli --- .../cli/app/services/network_service.py | 52 ++++++++++--------- weather_dl_v2/cli/app/subcommands/license.py | 4 +- weather_dl_v2/fastapi-server/main.py | 3 +- .../fastapi-server/routers/download.py | 1 + weather_dl_v2/license_deployment/fetch.py | 9 ++-- 5 files changed, 38 insertions(+), 31 deletions(-) diff --git a/weather_dl_v2/cli/app/services/network_service.py b/weather_dl_v2/cli/app/services/network_service.py index a65ee2e7..c846af3b 100644 --- a/weather_dl_v2/cli/app/services/network_service.py +++ b/weather_dl_v2/cli/app/services/network_service.py @@ -17,52 +17,54 @@ def parse_response(self, response: requests.Response): logger.info(f"Response {response.text}") return + if isinstance(parsed, list): + print(f"[Total {len(parsed)} items.]") + return json.dumps(parsed, indent=3) @timeit def get(self, uri, header, query=None, payload=None): - with Loader("Sending request..."): - try: + try: + with Loader("Sending request..."): x = requests.get(uri, params=query, headers=header, data=payload) - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) @timeit def post(self, uri, header, query=None, payload=None, file=None): - with Loader("Sending request..."): - try: + try: + with Loader("Sending request..."): x = requests.post( uri, params=query, headers=header, data=payload, files=file ) - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) @timeit def put(self, uri, header, query=None, payload=None, file=None): - with Loader("Sending request..."): - try: + try: + with Loader("Sending request..."): x = requests.put( uri, params=query, headers=header, data=payload, files=file ) - - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) @timeit def delete(self, uri, header, query=None): - with Loader("Sending request..."): - try: + try: + with Loader("Sending request..."): x = requests.delete(uri, params=query, headers=header) - return self.parse_response(x) - except requests.exceptions.RequestException as e: - logger.error(f"request error: {e}") - raise SystemExit(e) + return self.parse_response(x) + except requests.exceptions.RequestException as e: + logger.error(f"request error: {e}") + raise SystemExit(e) network_service = NetworkService() diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py index 1cf6c608..5599555b 100644 --- a/weather_dl_v2/cli/app/subcommands/license.py +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -73,12 +73,12 @@ def update_license( str, typer.Argument( help="""Input json file. Example json for updated license- """ - """{"client_name" : , "number_of_requests" : , "secret_id" : }""" + """{"client_id": , "client_name" : , "number_of_requests" : , "secret_id" : }""" ), ], # noqa ): validator = LicenseValidator( - valid_keys=["client_name", "number_of_requests", "secret_id"] + valid_keys=["client_id", "client_name", "number_of_requests", "secret_id"] ) try: license_dict = validator.validate_json(file_path=file_path) diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index f8b36350..06de8669 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -20,7 +20,8 @@ async def create_pending_license_deployments(): create_deployment = get_create_deployment() license_list = await license_handler._get_license_without_deployment() - for license_id in license_list: + for license in license_list: + license_id = license["license_id"] try: logger.info(f"Creating license deployment for {license_id}") await create_deployment(license_id, license_handler) diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 4fca2de9..b0e9288c 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -101,6 +101,7 @@ def upload(file: UploadFile): def get_reschedule_partitions(): def invoke_manifest_schedule(partition_list: list, manifest: Manifest): for partition in partition_list: + logger.info(f"Rescheduling partition {partition}.") manifest.schedule( partition["config_name"], partition["dataset"], diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index 678b5025..02727f90 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -63,9 +63,12 @@ def fetch_request_from_db(): request = None config_name = db_client._get_config_from_queue_by_license_id(license_id) if config_name: - request = db_client._get_partition_from_manifest(config_name) - if not request: - db_client._remove_config_from_license_queue(license_id, config_name) + try: + request = db_client._get_partition_from_manifest(config_name) + if not request: + db_client._remove_config_from_license_queue(license_id, config_name) + except Exception as e: + logger.error(f"Error in fetch_request_from_db. error: {e}.") return request From f8bbf7f0a102fe6f6552b7d45e1d4a2f8af711df Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Thu, 31 Aug 2023 16:38:19 +0530 Subject: [PATCH 39/51] `weather-dl-v2` cli show table (#388) * added option to view data as table * updated dependencies * minor fix * set table default response --- weather_dl_v2/cli/app/subcommands/download.py | 8 +++---- weather_dl_v2/cli/app/subcommands/license.py | 8 +++---- weather_dl_v2/cli/app/subcommands/queue.py | 12 ++++------ weather_dl_v2/cli/app/utils.py | 24 +++++++++++++++++-- weather_dl_v2/cli/environment.yml | 1 + weather_dl_v2/cli/setup.py | 2 +- 6 files changed, 37 insertions(+), 18 deletions(-) diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 4aff8cc8..671caa7a 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -1,7 +1,7 @@ import typer from typing_extensions import Annotated from app.services.download_service import download_service -from app.utils import Validator +from app.utils import Validator, as_table from typing import List app = typer.Typer(rich_markup_mode="markdown") @@ -31,10 +31,10 @@ def get_downloads( print(f"filter error: {e}") return - print(download_service._list_all_downloads_by_filter(filter_dict)) + print(as_table(download_service._list_all_downloads_by_filter(filter_dict))) return - print(download_service._list_all_downloads()) + print(as_table(download_service._list_all_downloads())) @app.command("add", help="Submit new config to download.") @@ -59,7 +59,7 @@ def submit_download( def get_download_by_config( config_name: Annotated[str, typer.Argument(help="Config file name.")] ): - print(download_service._get_download_by_config(config_name)) + print(as_table(download_service._get_download_by_config(config_name))) @app.command("remove", help="Remove existing config.") diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py index 5599555b..30c53113 100644 --- a/weather_dl_v2/cli/app/subcommands/license.py +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -1,7 +1,7 @@ import typer from typing_extensions import Annotated from app.services.license_service import license_service -from app.utils import Validator +from app.utils import Validator, as_table app = typer.Typer() @@ -26,15 +26,15 @@ def get_all_license( print(f"filter error: {e}") return - print(license_service._get_all_license_by_client_name(client_name)) + print(as_table(license_service._get_all_license_by_client_name(client_name))) return - print(license_service._get_all_license()) + print(as_table(license_service._get_all_license())) @app.command("get", help="Get a particular license by ID.") def get_license(license: Annotated[str, typer.Argument(help="License ID.")]): - print(license_service._get_license_by_license_id(license)) + print(as_table(license_service._get_license_by_license_id(license))) @app.command("add", help="Add new license.") diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py index 61bf8fdd..f26baa7b 100644 --- a/weather_dl_v2/cli/app/subcommands/queue.py +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -1,7 +1,7 @@ import typer from typing_extensions import Annotated from app.services.queue_service import queue_service -from app.utils import Validator +from app.utils import Validator, as_table app = typer.Typer() @@ -26,17 +26,15 @@ def get_all_license_queue( print(f"filter error: {e}") return - print(queue_service._get_license_queue_by_client_name(client_name)) + print(as_table(queue_service._get_license_queue_by_client_name(client_name))) return - print(queue_service._get_all_license_queues()) + print(as_table(queue_service._get_all_license_queues())) @app.command("get", help="Get queue of particular license.") -def get_license_queue( - license: Annotated[str, typer.Argument(help="License ID")], -): - print(queue_service._get_queue_by_license(license)) +def get_license_queue(license: Annotated[str, typer.Argument(help="License ID")]): + print(as_table(queue_service._get_queue_by_license(license))) @app.command( diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 87003b6c..469278d1 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -8,6 +8,7 @@ from shutil import get_terminal_size from threading import Thread from time import sleep +from tabulate import tabulate logger = logging.getLogger(__name__) @@ -23,6 +24,25 @@ def wrap_func(*args, **kwargs): return wrap_func +def as_table(response: str): + data = json.loads(response) + + if not isinstance(data, list): + # convert response to list if not a list. + data = [data] + + header = data[0].keys() + # if any column has lists, convert that to a string. + rows = [ + [",\n".join(val) if isinstance(val, list) else val for val in x.values()] + for x in data + ] + rows.insert(0, list(header)) + return tabulate( + rows, showindex=True, tablefmt="grid", maxcolwidths=[16] * len(header) + ) + + class Loader: def __init__(self, desc="Loading...", end="", timeout=0.1): @@ -92,7 +112,7 @@ def validate( if self._validate_keys(data_set, valid_set, allow_missing): return filter_dict - def validate_json(self, file_path): + def validate_json(self, file_path, allow_missing: bool = False): try: with open(file_path) as f: data: dict = json.load(f) @@ -101,7 +121,7 @@ def validate_json(self, file_path): data_set = set(data_keys) valid_set = set(self.valid_keys) - if self._validate_keys(data_set, valid_set): + if self._validate_keys(data_set, valid_set, allow_missing): return data except FileNotFoundError: diff --git a/weather_dl_v2/cli/environment.yml b/weather_dl_v2/cli/environment.yml index 74cc375c..f2ffec62 100644 --- a/weather_dl_v2/cli/environment.yml +++ b/weather_dl_v2/cli/environment.yml @@ -5,6 +5,7 @@ dependencies: - python=3.10 - pip=23.0.1 - typer=0.9.0 + - tabulate=0.9.0 - pip: - requests - ruff diff --git a/weather_dl_v2/cli/setup.py b/weather_dl_v2/cli/setup.py index 66cba004..75cad67c 100644 --- a/weather_dl_v2/cli/setup.py +++ b/weather_dl_v2/cli/setup.py @@ -1,6 +1,6 @@ from setuptools import setup -requirements = ["typer", "requests"] +requirements = ["typer", "requests", "tabulate"] setup( name="weather-dl-v2", From d5c5272126a36a799b4adfc721f45db93f6dfd28 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Tue, 5 Sep 2023 17:19:55 +0530 Subject: [PATCH 40/51] `dl-v2` server queue client name bug (#389) * Updating queue client_name when license is updated. * nit: updated method name --- .../fastapi-server/database/queue_handler.py | 25 +++++++++++++++++++ .../fastapi-server/routers/license.py | 4 +++ 2 files changed, 29 insertions(+) diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index 62c8f2ff..d9973920 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -59,6 +59,12 @@ async def _update_config_priority_in_license( ) -> None: pass + @abc.abstractmethod + async def _update_client_name_in_license_queue( + self, license_id: str, client_name: str + ) -> None: + pass + class QueueHandlerMock(QueueHandler): @@ -110,6 +116,13 @@ async def _update_config_priority_in_license( "Updated snapshot.id queue in 'queues' collection. Update_time: 00000." ) + async def _update_client_name_in_license_queue( + self, license_id: str, client_name: str + ) -> None: + logger.info( + "Updated snapshot.id queue in 'queues' collection. Update_time: 00000." + ) + class QueueHandlerFirestore(QueueHandler): @@ -205,3 +218,15 @@ async def _update_config_priority_in_license( logger.info( f"Updated {snapshot.id} queue in 'queues' collection. Update_time: {result.update_time}." ) + + async def _update_client_name_in_license_queue( + self, license_id: str, client_name: str + ) -> None: + result: WriteResult = ( + await self.db.collection(self.collection) + .document(license_id) + .update({"client_name": client_name}) + ) + logger.info( + f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}." + ) diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index ebb826bb..20c81bab 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -104,6 +104,7 @@ async def update_license( license_id: str, license: License, license_handler: LicenseHandler = Depends(get_license_handler), + queue_handler: QueueHandler = Depends(get_queue_handler), create_deployment=Depends(get_create_deployment), terminate_license_deployment=Depends(get_terminate_license_deployment), ): @@ -115,6 +116,9 @@ async def update_license( license_dict = license.dict() await license_handler._update_license(license_id, license_dict) + await queue_handler._update_client_name_in_license_queue( + license_id, license_dict["client_name"] + ) terminate_license_deployment(license_id) await create_deployment(license_id, license_handler) From c24cbae7c9560179a43d0d40cfdc3659d73cb521 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 6 Sep 2023 17:53:01 +0530 Subject: [PATCH 41/51] `dl-v2` license dep logger (#390) * updated logger * fixed logger --- weather_dl_v2/license_deployment/fetch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index 02727f90..5229bcac 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -15,9 +15,6 @@ db_client = FirestoreClient() secretmanager_client = secretmanager.SecretManagerServiceClient() -logger = logging.getLogger(__name__) - - def create_job(request, result): res = { "config_name": request["config_name"], @@ -64,11 +61,12 @@ def fetch_request_from_db(): config_name = db_client._get_config_from_queue_by_license_id(license_id) if config_name: try: + logger.info(f"Fetching partition for {config_name}") request = db_client._get_partition_from_manifest(config_name) if not request: db_client._remove_config_from_license_queue(license_id, config_name) except Exception as e: - logger.error(f"Error in fetch_request_from_db. error: {e}.") + logger.error(f"Error in fetch_request_from_db for {config_name}. error: {e}.") return request @@ -112,6 +110,10 @@ def boot_up(license: str) -> None: if __name__ == "__main__": license = sys.argv[2] + global logger + logging.basicConfig(level=logging.INFO, format=f'[{license}] %(levelname)s - %(message)s') + logger = logging.getLogger(__name__) + logger.info(f"Deployment for license: {license}.") boot_up(license) main() From bd3081f153c23dc007fc5ae39c818aefe7c90fa7 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 13 Sep 2023 17:51:50 +0530 Subject: [PATCH 42/51] `dl-v2` cli minor updates (#393) * minor changes * lint fix --- weather_dl_v2/cli/app/utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 469278d1..866f884e 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -31,10 +31,18 @@ def as_table(response: str): # convert response to list if not a list. data = [data] + if len(data) == 0: + return "" + header = data[0].keys() # if any column has lists, convert that to a string. rows = [ - [",\n".join(val) if isinstance(val, list) else val for val in x.values()] + [ + ",\n".join([f"{i} {ele}" for i, ele in enumerate(val)]) + if isinstance(val, list) + else val + for val in x.values() + ] for x in data ] rows.insert(0, list(header)) From fc3c8a2f055d20adbcf67ae5a1875ea647bab96a Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Thu, 14 Sep 2023 15:46:06 +0530 Subject: [PATCH 43/51] `dl-v2` added gcs storage and updated retry partition (#394) * added GCS storage capabltiy to server * updated refetch partition to use config from gcs storage * lint fixes * added cli command to show config and updated docs * nit fixes * updated comments for dl-v2 --- weather_dl_v2/cli/CLI-Documentation.md | 14 +++ .../cli/app/services/download_service.py | 10 ++ weather_dl_v2/cli/app/subcommands/download.py | 6 ++ .../fastapi-server/API-Interactions.md | 1 + .../fastapi-server/config_processing/util.py | 6 +- .../fastapi-server/database/session.py | 17 ++++ .../database/storage_handler.py | 62 ++++++++++++ .../license_dep/deployment_creator.py | 2 +- weather_dl_v2/fastapi-server/main.py | 6 +- .../fastapi-server/routers/download.py | 94 +++++++++++++------ .../fastapi-server/routers/license.py | 4 +- .../fastapi-server/server_config.json | 2 + weather_dl_v2/fastapi-server/server_config.py | 2 + weather_dl_v2/license_deployment/fetch.py | 2 +- weather_dl_v2/license_deployment/util.py | 8 +- 15 files changed, 192 insertions(+), 44 deletions(-) create mode 100644 weather_dl_v2/fastapi-server/database/storage_handler.py diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md index bebeebbd..9ae26d57 100644 --- a/weather_dl_v2/cli/CLI-Documentation.md +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -83,6 +83,20 @@ weather-dl-v2 download get example.cfg ``` +
+ weather-dl-v2 download show
+ Get contents of a particular config by config name. +
+ +##### Arguments +> `CONFIG_NAME` : Name of the download config. + +##### Usage +``` +weather-dl-v2 download show example.cfg +``` +
+
weather-dl-v2 download remove
Remove a download by config name. diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 5ad6644e..654941c8 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -21,6 +21,10 @@ def _list_all_downloads_by_filter(self, filter_dict: dict): @abc.abstractmethod def _get_download_by_config(self, config_name: str): pass + + @abc.abstractmethod + def _show_config_content(self, config_name: str): + pass @abc.abstractmethod def _add_new_download( @@ -59,6 +63,12 @@ def _get_download_by_config(self, config_name: str): uri=f"{self.endpoint}/{config_name}", header={"accept": "application/json"}, ) + + def _show_config_content(self, config_name: str): + return network_service.get( + uri=f"{self.endpoint}/show/{config_name}", + header={"accept": "application/json"}, + ) def _add_new_download( self, file_path: str, licenses: t.List[str], force_download: bool diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 671caa7a..24f5ea8b 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -61,6 +61,12 @@ def get_download_by_config( ): print(as_table(download_service._get_download_by_config(config_name))) +@app.command("show", help="Show contents of a particular config.") +def get_download_by_config( + config_name: Annotated[str, typer.Argument(help="Config file name.")] +): + print(download_service._show_config_content(config_name)) + @app.command("remove", help="Remove existing config.") def remove_download( diff --git a/weather_dl_v2/fastapi-server/API-Interactions.md b/weather_dl_v2/fastapi-server/API-Interactions.md index 45a9d768..3ea4eece 100644 --- a/weather_dl_v2/fastapi-server/API-Interactions.md +++ b/weather_dl_v2/fastapi-server/API-Interactions.md @@ -7,6 +7,7 @@ | `weather-dl-v2 download list` | `get` | `/download/` | | `weather-dl-v2 download list --filter client_name=` | `get` | `/download?client_name={name}` | | `weather-dl-v2 download get ` | `get` | `/download/{config_name}` | +| `weather-dl-v2 download show ` | `get` | `/download/show/{config_name}` | | `weather-dl-v2 download remove ` | `delete` | `/download/{config_name}` | | `weather-dl-v2 download refetch -l ` | `post` | `/download/refetch/{config_name}` | | License | | | diff --git a/weather_dl_v2/fastapi-server/config_processing/util.py b/weather_dl_v2/fastapi-server/config_processing/util.py index cb6544bf..12217211 100644 --- a/weather_dl_v2/fastapi-server/config_processing/util.py +++ b/weather_dl_v2/fastapi-server/config_processing/util.py @@ -79,7 +79,7 @@ def copy(src: str, dst: str) -> None: subprocess.run(["gsutil", "cp", src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: logger.info( - f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}' + f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}.' ) raise @@ -88,7 +88,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - logger.info("Serializing to JSON") + logger.info("Serializing to JSON.") if pd.isna(value) or value is None: return None @@ -209,6 +209,6 @@ def download_with_aria2(url: str, path: str) -> None: ) except subprocess.CalledProcessError as e: logger.info( - f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}' + f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}.' ) raise diff --git a/weather_dl_v2/fastapi-server/database/session.py b/weather_dl_v2/fastapi-server/database/session.py index 91579bb3..afac904d 100644 --- a/weather_dl_v2/fastapi-server/database/session.py +++ b/weather_dl_v2/fastapi-server/database/session.py @@ -5,6 +5,8 @@ from google.cloud import firestore from firebase_admin import credentials from config_processing.util import get_wait_interval +from server_config import get_config +from gcloud import storage logger = logging.getLogger(__name__) @@ -17,6 +19,7 @@ def _get_db(self): db: firestore.AsyncClient = None +gcs: storage.Client = None def get_async_client() -> firestore.AsyncClient: @@ -45,3 +48,17 @@ def get_async_client() -> firestore.AsyncClient: attempts += 1 return db + + +def get_gcs_client() -> storage.Client: + global gcs + + if gcs: + return gcs + + try: + gcs = storage.Client(project=get_config().gcs_project) + except ValueError as e: + logger.error(f"Error initializing GCS client: {e}.") + + return gcs diff --git a/weather_dl_v2/fastapi-server/database/storage_handler.py b/weather_dl_v2/fastapi-server/database/storage_handler.py new file mode 100644 index 00000000..32176ec3 --- /dev/null +++ b/weather_dl_v2/fastapi-server/database/storage_handler.py @@ -0,0 +1,62 @@ +import abc +import os +import logging +import tempfile +import contextlib +import typing as t +from google.cloud import storage +from database.session import get_gcs_client +from server_config import get_config + + +logger = logging.getLogger(__name__) + + +def get_storage_handler(): + return StorageHandlerGCS(client=get_gcs_client()) + + +class StorageHandler(abc.ABC): + + @abc.abstractmethod + def _upload_file(self, file_path) -> str: + pass + + @abc.abstractmethod + def _open_local(self, file_name) -> t.Iterator[str]: + pass + + +class StorageHandlerMock(StorageHandler): + + def __init__(self) -> None: + pass + + def _upload_file(self, file_path) -> None: + pass + + def _open_local(self, file_name) -> t.Iterator[str]: + pass + + +class StorageHandlerGCS(StorageHandler): + + def __init__(self, client: storage.Client) -> None: + self.client = client + self.bucket = self.client.get_bucket(get_config().storage_bucket) + + def _upload_file(self, file_path) -> str: + filename = os.path.basename(file_path).split("/")[-1] + + blob = self.bucket.blob(filename) + blob.upload_from_filename(file_path) + + logger.info(f"Uploaded {filename} to {self.bucket}.") + return blob.public_url + + @contextlib.contextmanager + def _open_local(self, file_name) -> t.Iterator[str]: + blob = self.bucket.blob(file_name) + with tempfile.NamedTemporaryFile() as dest_file: + blob.download_to_filename(dest_file.name) + yield dest_file.name diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py index 77927b0f..68dd9527 100644 --- a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -32,7 +32,7 @@ def create_license_deployment(license_id: str) -> str: body=deployment_manifest, namespace="default" ) - logger.info(f"Deployment created successfully: {response.metadata.name}") + logger.info(f"Deployment created successfully: {response.metadata.name}.") return deployment_name diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 06de8669..b75c7112 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -23,15 +23,15 @@ async def create_pending_license_deployments(): for license in license_list: license_id = license["license_id"] try: - logger.info(f"Creating license deployment for {license_id}") + logger.info(f"Creating license deployment for {license_id}.") await create_deployment(license_id, license_handler) except Exception as e: - logger.error(f"License deployment failed for {license_id}. Exception: {e}") + logger.error(f"License deployment failed for {license_id}. Exception: {e}.") @asynccontextmanager async def lifespan(app: FastAPI): - logger.info("Started FastAPI server") + logger.info("Started FastAPI server.") # Boot up # Make directory to store the uploaded config files. os.makedirs(os.path.join(os.getcwd(), "config_files"), exist_ok=True) diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index b0e9288c..96db4c74 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -5,12 +5,16 @@ import json from enum import Enum +from config_processing.parsers import parse_config, process_config +from config_processing.config import Config +from server_config import get_config from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, Depends, Body from config_processing.pipeline import start_processing_config from database.download_handler import DownloadHandler, get_download_handler from database.queue_handler import QueueHandler, get_queue_handler from database.license_handler import LicenseHandler, get_license_handler from database.manifest_handler import ManifestHandler, get_manifest_handler +from database.storage_handler import StorageHandler, get_storage_handler from config_processing.manifest import FirestoreManifest, Manifest from fastapi.concurrency import run_in_threadpool @@ -86,6 +90,10 @@ def upload(file: UploadFile): dest = os.path.join(os.getcwd(), "config_files", file.filename) with open(dest, "wb+") as dest_: shutil.copyfileobj(file.file, dest_) + + logger.info(f"Uploading {file.filename} to gcs bucket.") + storage_handler: StorageHandler = get_storage_handler() + storage_handler._upload_file(dest) return dest return upload @@ -99,34 +107,52 @@ def upload(file: UploadFile): def get_reschedule_partitions(): - def invoke_manifest_schedule(partition_list: list, manifest: Manifest): + def invoke_manifest_schedule( + partition_list: list, config: Config, manifest: Manifest + ): for partition in partition_list: logger.info(f"Rescheduling partition {partition}.") manifest.schedule( - partition["config_name"], - partition["dataset"], + config.config_name, + config.dataset, json.loads(partition["selection"]), partition["location"], partition["username"], ) - async def reschedule_partitions( - config_name: str, - licenses: list, - manifest_handler: ManifestHandler, - download_handler: DownloadHandler, - queue_handler: QueueHandler, - ): + async def reschedule_partitions(config_name: str, licenses: list): + manifest_handler: ManifestHandler = get_manifest_handler() + download_handler: DownloadHandler = get_download_handler() + queue_handler: QueueHandler = get_queue_handler() + storage_handler: StorageHandler = get_storage_handler() + partition_list = await manifest_handler._get_non_successfull_downloads( config_name ) + + config = None manifest = FirestoreManifest() + + with storage_handler._open_local(config_name) as local_path: + with open(local_path, "r", encoding="utf-8") as f: + config = process_config(f, config_name) + await download_handler._mark_partitioning_status( config_name, "Partitioning in-progress." ) try: - await run_in_threadpool(invoke_manifest_schedule, partition_list, manifest) + if config is None: + logger.error( + f"Failed reschedule_partitions. Could not open {config_name}." + ) + raise FileNotFoundError( + f"Failed reschedule_partitions. Could not open {config_name}." + ) + + await run_in_threadpool( + invoke_manifest_schedule, partition_list, config, manifest + ) await download_handler._mark_partitioning_status( config_name, "Partitioning completed." ) @@ -140,13 +166,7 @@ async def reschedule_partitions( def get_reschedule_partitions_mock(): - def reschedule_partitions( - config_name: str, - licenses: list, - manifest_handler: ManifestHandler, - download_handler: DownloadHandler, - queue_handler: QueueHandler, - ): + def reschedule_partitions(config_name: str, licenses: list): pass return reschedule_partitions @@ -206,6 +226,29 @@ class DownloadStatus(str, Enum): IN_PROGRESS = "in-progress" +@router.get("/show/{config_name}") +async def show_download_config( + config_name: str, + download_handler: DownloadHandler = Depends(get_download_handler), + storage_handler: StorageHandler = Depends(get_storage_handler), +): + if not await download_handler._check_download_exists(config_name): + logger.error(f"No such download config {config_name} to show.") + raise HTTPException( + status_code=404, + detail=f"No such download config {config_name} to show.", + ) + + contents = None + + with storage_handler._open_local(config_name) as local_path: + with open(local_path, "r", encoding="utf-8") as f: + contents = parse_config(f) + logger.info(f"Contents of {config_name}: {contents}.") + + return {"config_name": config_name, "contents": contents} + + # Can check the current status of the submitted config. # List status for all the downloads + handle filters @router.get("/") @@ -307,16 +350,14 @@ async def retry_config( licenses: list = Body(embed=True), background_tasks: BackgroundTasks = BackgroundTasks(), download_handler: DownloadHandler = Depends(get_download_handler), - queue_handler: QueueHandler = Depends(get_queue_handler), - manifest_handler: ManifestHandler = Depends(get_manifest_handler), license_handler: LicenseHandler = Depends(get_license_handler), reschedule_partitions=Depends(get_reschedule_partitions), ): if not await download_handler._check_download_exists(config_name): - logger.error(f"No such download config {config_name} to stop & remove.") + logger.error(f"No such download config {config_name} to retry.") raise HTTPException( status_code=404, - detail=f"No such download config {config_name} to stop & remove.", + detail=f"No such download config {config_name} to retry.", ) for license_id in licenses: @@ -326,13 +367,6 @@ async def retry_config( status_code=404, detail=f"No such license {license_id}." ) - background_tasks.add_task( - reschedule_partitions, - config_name, - licenses, - manifest_handler, - download_handler, - queue_handler, - ) + background_tasks.add_task(reschedule_partitions, config_name, licenses) return {"msg": "Refetch initiated successfully."} diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 20c81bab..7202168c 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -57,7 +57,7 @@ async def create_deployment(license_id: str, license_handler: LicenseHandler): def get_create_deployment_mock(): def create_deployment_mock(license_id: str, license_handler: LicenseHandler): - logger.info("create deployment mocked") + logger.info("create deployment mock.") return create_deployment_mock @@ -68,7 +68,7 @@ def get_terminate_license_deployment(): def get_terminate_license_deployment_mock(): def get_terminate_license_deployment_mock(license_id): - logger.info(f"terminating license deployment for {license_id}") + logger.info(f"terminating license deployment for {license_id}.") return get_terminate_license_deployment_mock diff --git a/weather_dl_v2/fastapi-server/server_config.json b/weather_dl_v2/fastapi-server/server_config.json index 7b36be7c..65cb7c80 100644 --- a/weather_dl_v2/fastapi-server/server_config.json +++ b/weather_dl_v2/fastapi-server/server_config.json @@ -3,5 +3,7 @@ "queues_collection": "queues", "license_collection": "license", "manifest_collection": "manifest", + "storage_bucket": "XXXXXXXX", + "gcs_project": "XXXXXXXX", "license_deployment_image": "XXXXXXXX" } \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/server_config.py b/weather_dl_v2/fastapi-server/server_config.py index e177567e..bf76a291 100644 --- a/weather_dl_v2/fastapi-server/server_config.py +++ b/weather_dl_v2/fastapi-server/server_config.py @@ -11,6 +11,8 @@ class ServerConfig: queues_collection: str = "" license_collection: str = "" manifest_collection: str = "" + storage_bucket: str = "" + gcs_project: str = "" license_deployment_image: str = "" kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index 5229bcac..5e7c4fe7 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -61,7 +61,7 @@ def fetch_request_from_db(): config_name = db_client._get_config_from_queue_by_license_id(license_id) if config_name: try: - logger.info(f"Fetching partition for {config_name}") + logger.info(f"Fetching partition for {config_name}.") request = db_client._get_partition_from_manifest(config_name) if not request: db_client._remove_config_from_license_queue(license_id, config_name) diff --git a/weather_dl_v2/license_deployment/util.py b/weather_dl_v2/license_deployment/util.py index 7ea99ffe..0cd5ecbc 100644 --- a/weather_dl_v2/license_deployment/util.py +++ b/weather_dl_v2/license_deployment/util.py @@ -30,7 +30,7 @@ def inner_function(*args, **kwargs): try: func(*args, **kwargs) except Exception as e: - logger.error(f"exception in {func.__name__} {e.__class__.__name__} {e}") + logger.error(f"exception in {func.__name__} {e.__class__.__name__} {e}.") return inner_function @@ -89,7 +89,7 @@ def copy(src: str, dst: str) -> None: subprocess.run(["gsutil", "cp", src, dst], check=True, capture_output=True) except subprocess.CalledProcessError as e: logger.info( - f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}' + f'Failed to copy file {src!r} to {dst!r} due to {e.stderr.decode("utf-8")}.' ) raise @@ -98,7 +98,7 @@ def copy(src: str, dst: str) -> None: def to_json_serializable_type(value: t.Any) -> t.Any: """Returns the value with a type serializable to JSON""" # Note: The order of processing is significant. - logger.info("Serializing to JSON") + logger.info("Serializing to JSON.") if pd.isna(value) or value is None: return None @@ -219,6 +219,6 @@ def download_with_aria2(url: str, path: str) -> None: ) except subprocess.CalledProcessError as e: logger.info( - f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}' + f'Failed download from server {url!r} to {path!r} due to {e.stderr.decode("utf-8")}.' ) raise From bbfe5547b6131ea14d83c2b5f485e154525633a1 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Fri, 15 Sep 2023 15:28:55 +0530 Subject: [PATCH 44/51] `dl-v2` cli doc update (#395) * updated cli docs * updated cli doc --- weather_dl_v2/cli/CLI-Documentation.md | 68 ++++++++++---------------- weather_dl_v2/cli/README.md | 2 +- 2 files changed, 28 insertions(+), 42 deletions(-) diff --git a/weather_dl_v2/cli/CLI-Documentation.md b/weather_dl_v2/cli/CLI-Documentation.md index 9ae26d57..ea16bb6b 100644 --- a/weather_dl_v2/cli/CLI-Documentation.md +++ b/weather_dl_v2/cli/CLI-Documentation.md @@ -9,7 +9,6 @@ weather-dl-v2 ## Ping Ping the FastAPI server and check if it’s live and reachable. -
weather-dl-v2 ping ##### Usage @@ -17,14 +16,14 @@ Ping the FastAPI server and check if it’s live and reachable. weather-dl-v2 ping ``` -

## Download Manage download configs. -
+ +### Add Downloads weather-dl-v2 download add
Adds a new download config to specific licenses.
@@ -34,25 +33,28 @@ Manage download configs. > `FILE_PATH` : Path to config file. ##### Options -> `-l/--license` (Required): License ID to which this download has to be added to. +> `-l/--license` (Required): License ID to which this download has to be added to. > `-f/--force-download` : Force redownload of partitions that were previously downloaded. ##### Usage ``` weather-dl-v2 download add /path/to/example.cfg –l L1 -l L2 [--force-download] ``` -
- -
+### List Downloads weather-dl-v2 download list
List all the active downloads.
The list can also be filtered out by client_names. -Available filters: -[key: client_name, values: cds, mars, ecpublic] -[key: status, values: completed, failed, in-progress] +Available filters: +``` +Filter Key: client_name +Values: cds, mars, ecpublic + +Filter Key: status +Values: completed, failed, in-progress +``` ##### Options > `--filter` : Filter the list by some key and value. Format of filter filter_key=filter_value @@ -66,10 +68,8 @@ weather-dl-v2 download list --filter status=failed weather-dl-v2 download list --filter status=in-progress weather-dl-v2 download list --filter client_name=cds --filter status=success ``` -
- -
+### Download Get weather-dl-v2 download get
Get a particular download by config name.
@@ -81,9 +81,8 @@ weather-dl-v2 download list --filter client_name=cds --filter status=success ``` weather-dl-v2 download get example.cfg ``` -
-
+### Download Show weather-dl-v2 download show
Get contents of a particular config by config name.
@@ -95,9 +94,8 @@ weather-dl-v2 download get example.cfg ``` weather-dl-v2 download show example.cfg ``` -
-
+### Download Remove weather-dl-v2 download remove
Remove a download by config name.
@@ -109,9 +107,8 @@ weather-dl-v2 download show example.cfg ``` weather-dl-v2 download remove example.cfg ``` -
-
+### Download Refetch weather-dl-v2 download refetch
Refetch all non-successful partitions of a config.
@@ -126,14 +123,13 @@ weather-dl-v2 download remove example.cfg ``` weather-dl-v2 download refetch example.cfg -l L1 -l L2 ``` -

## License Manage licenses. -
+### License Add weather-dl-v2 license add
Add a new license. New licenses are added using a json file.
@@ -157,9 +153,8 @@ NOTE: `license_id` is case insensitive and has to be unique for each license. ``` weather-dl-v2 license add /path/to/new-license.json ``` -
-
+### License Get weather-dl-v2 license get
Get a particular license by license ID.
@@ -171,9 +166,8 @@ weather-dl-v2 license add /path/to/new-license.json ``` weather-dl-v2 license get L1 ``` -
-
+### License Remove weather-dl-v2 license remove
Remove a particular license by license ID.
@@ -185,9 +179,8 @@ weather-dl-v2 license get L1 ``` weather-dl-v2 license remove L1 ``` -
-
+### License List weather-dl-v2 license list
List all the licenses available.
@@ -202,9 +195,8 @@ weather-dl-v2 license remove L1 weather-dl-v2 license list weather-dl-v2 license list --filter client_name=cds ``` -
-
+### License Update weather-dl-v2 license update
Update an existing license using License ID and a license json.
@@ -217,16 +209,15 @@ weather-dl-v2 license list --filter client_name=cds ##### Usage ``` -weather-dl-v2 license edit L1 /path/to/license.json +weather-dl-v2 license update L1 /path/to/license.json ``` -

## Queue Manage all the license queue. -
+### Queue List weather-dl-v2 queue list
List all the queues.
@@ -241,9 +232,8 @@ Manage all the license queue. weather-dl-v2 queue list weather-dl-v2 queue list --filter client_name=cds ``` -
-
+### Queue Get weather-dl-v2 queue get
Get a queue by license ID.
@@ -257,9 +247,8 @@ weather-dl-v2 queue list --filter client_name=cds ``` weather-dl-v2 queue get L1 ``` -
-
+### Queue Edit weather-dl-v2 queue edit
Edit the priority of configs inside queues using edit.
@@ -286,14 +275,13 @@ Priority can be edited in two ways: weather-dl-v2 queue edit L1 --file /path/to/priority.json weather-dl-v2 queue edit L1 --config example.cfg --priority 0 ``` -

## Config Configurations for cli. -
+### Config Show IP weather-dl-v2 config show-ip
See the current server IP address.
@@ -302,9 +290,8 @@ See the current server IP address. ``` weather-dl-v2 config show-ip ``` -
-
+### Config Set IP weather-dl-v2 config set-ip
See the current server IP address.
@@ -316,5 +303,4 @@ See the current server IP address. ``` weather-dl-v2 config set-ip 127.0.0.1 ``` -
diff --git a/weather_dl_v2/cli/README.md b/weather_dl_v2/cli/README.md index 371142dd..a4f4932f 100644 --- a/weather_dl_v2/cli/README.md +++ b/weather_dl_v2/cli/README.md @@ -25,7 +25,7 @@ gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-cli" --time ## Create a VM using above created docker-image ``` -export ZONE= eg: us-cental1-a +export ZONE= eg: us-west1-a export SERVICE_ACCOUNT= # Let's keep this as Compute Engine Default Service Account export IMAGE_PATH= # The above created image-path From bd762e6fb5931c4cd7f153d1ea1b6128a0da7890 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Thu, 21 Sep 2023 12:42:10 +0530 Subject: [PATCH 45/51] `dl v2` config map (#397) * updated config files for all services * added config maps to all services * added config.json * updated readmes * fixed downloader yml * updated config.json(s) * removed config folder * updated get_config * updated readmes --- weather_dl_v2/README.md | 1 + weather_dl_v2/config.json | 11 ++++++++ weather_dl_v2/downloader_kubernetes/README.md | 4 +-- .../downloader_config.json | 3 --- .../downloader_config.py | 22 ++++++++++++---- weather_dl_v2/fastapi-server/README.md | 13 ++++++++-- .../license_dep/license_deployment.yaml | 7 ++++++ weather_dl_v2/fastapi-server/main.py | 3 ++- weather_dl_v2/fastapi-server/server.yaml | 7 ++++++ .../fastapi-server/server_config.json | 9 ------- weather_dl_v2/fastapi-server/server_config.py | 25 ++++++++++++++----- weather_dl_v2/license_deployment/README.md | 4 +-- .../license_deployment/deployment_config.json | 7 ------ .../license_deployment/deployment_config.py | 22 ++++++++++++---- .../license_deployment/downloader.yaml | 7 +++++- 15 files changed, 102 insertions(+), 43 deletions(-) create mode 100644 weather_dl_v2/config.json delete mode 100644 weather_dl_v2/downloader_kubernetes/downloader_config.json delete mode 100644 weather_dl_v2/fastapi-server/server_config.json delete mode 100644 weather_dl_v2/license_deployment/deployment_config.json diff --git a/weather_dl_v2/README.md b/weather_dl_v2/README.md index 35e7d504..574cf16c 100644 --- a/weather_dl_v2/README.md +++ b/weather_dl_v2/README.md @@ -5,3 +5,4 @@ 2) Refer to license_deployment/README.md 3) Refer to fastapi-server/README.md 4) Refer to cli/README.md + diff --git a/weather_dl_v2/config.json b/weather_dl_v2/config.json new file mode 100644 index 00000000..f5afae8b --- /dev/null +++ b/weather_dl_v2/config.json @@ -0,0 +1,11 @@ +{ + "download_collection": "download", + "queues_collection": "queues", + "license_collection": "license", + "manifest_collection": "manifest", + "storage_bucket": "XXXXXXX", + "gcs_project": "XXXXXXX", + "license_deployment_image": "XXXXXXX", + "downloader_k8_image": "XXXXXXX", + "welcome_message": "Greetings from weather-dl v2!" +} \ No newline at end of file diff --git a/weather_dl_v2/downloader_kubernetes/README.md b/weather_dl_v2/downloader_kubernetes/README.md index e3f36f47..b0d865f8 100644 --- a/weather_dl_v2/downloader_kubernetes/README.md +++ b/weather_dl_v2/downloader_kubernetes/README.md @@ -9,9 +9,9 @@ We are not configuring any service account here hence make sure that compute eng * roles/bigquery.dataEditor * roles/bigquery.jobUser -### Make changes in downloader config, if required +### Make changes in weather_dl_v2/config.json, if required [for running locally] ``` -Please make approriate changes in downloader_config.json, if required. +export CONFIG_PATH=/path/to/weather_dl_v2/config.json ``` ### Create docker image for downloader: diff --git a/weather_dl_v2/downloader_kubernetes/downloader_config.json b/weather_dl_v2/downloader_kubernetes/downloader_config.json deleted file mode 100644 index b7e53cfc..00000000 --- a/weather_dl_v2/downloader_kubernetes/downloader_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "manifest_collection": "manifest" -} \ No newline at end of file diff --git a/weather_dl_v2/downloader_kubernetes/downloader_config.py b/weather_dl_v2/downloader_kubernetes/downloader_config.py index b66e90fa..9c226373 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader_config.py +++ b/weather_dl_v2/downloader_kubernetes/downloader_config.py @@ -1,6 +1,10 @@ import dataclasses import typing as t import json +import os +import logging + +logger = logging.getLogger(__name__) Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet @@ -28,11 +32,19 @@ def from_dict(cls, config: t.Dict): def get_config(): global downloader_config - downloader_config_json = "downloader_config.json" + if downloader_config: + return downloader_config + + downloader_config_json = "config/config.json" + if not os.path.exists(downloader_config_json): + downloader_config_json = os.environ.get('CONFIG_PATH', None) + + if downloader_config_json is None: + logger.error(f"Couldn't load config file for downloader.") + raise FileNotFoundError("Couldn't load config file for downloader.") - if downloader_config is None: - with open(downloader_config_json) as file: - config_dict = json.load(file) - downloader_config = DownloaderConfig.from_dict(config_dict) + with open(downloader_config_json) as file: + config_dict = json.load(file) + downloader_config = DownloaderConfig.from_dict(config_dict) return downloader_config diff --git a/weather_dl_v2/fastapi-server/README.md b/weather_dl_v2/fastapi-server/README.md index 26d2198e..2debb563 100644 --- a/weather_dl_v2/fastapi-server/README.md +++ b/weather_dl_v2/fastapi-server/README.md @@ -40,9 +40,9 @@ conda env create --name weather-dl-v2-server --file=environment.yml conda activate weather-dl-v2-server ``` -### Make changes in server config, if required +### Make changes in weather_dl_v2/config.json, if required [for running locally] ``` -Please make approriate changes in server_config.json, if required. +export CONFIG_PATH=/path/to/weather_dl_v2/config.json ``` ### To run fastapi server: @@ -66,6 +66,15 @@ gcloud builds submit . --tag "gcr.io/$PROJECT_ID/$REPO:weather-dl-v2-server" --t Please write down the fastAPI server's docker image path at Line 42 of server.yaml. ``` +### Create ConfigMap of common configurations for services: +Make necessary changes to weather_dl_v2/config.json and run following command. +ConfigMap is used for: +- Having a common configuration file for all services. +- Decoupling docker image and config files. +``` +kubectl create configmap dl-v2-config --from-file=/path/to/weather_dl_v2/config.json +``` + ### Deploy fastapi server on kubernetes: ``` kubectl apply -f server.yaml --force diff --git a/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml b/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml index db27e2b1..707e5b91 100644 --- a/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml +++ b/weather_dl_v2/fastapi-server/license_dep/license_deployment.yaml @@ -21,6 +21,13 @@ spec: image: XXXXXXX imagePullPolicy: Always args: [] + volumeMounts: + - name: config-volume + mountPath: ./config + volumes: + - name: config-volume + configMap: + name: dl-v2-config # resources: # # You must specify requests for CPU to autoscale # # based on CPU utilization diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index b75c7112..114e9597 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -6,6 +6,7 @@ from routers import license, download, queues from database.license_handler import get_license_handler from routers.license import get_create_deployment +from server_config import get_config ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -51,4 +52,4 @@ async def lifespan(app: FastAPI): @app.get("/") async def main(): - return {"msg": "Greetings from weather-dl v2 !!"} + return {"msg": get_config().welcome_message} diff --git a/weather_dl_v2/fastapi-server/server.yaml b/weather_dl_v2/fastapi-server/server.yaml index e64d6355..b8a2f40d 100644 --- a/weather_dl_v2/fastapi-server/server.yaml +++ b/weather_dl_v2/fastapi-server/server.yaml @@ -43,6 +43,13 @@ spec: ports: - containerPort: 8080 imagePullPolicy: Always + volumeMounts: + - name: config-volume + mountPath: ./config + volumes: + - name: config-volume + configMap: + name: dl-v2-config # resources: # # You must specify requests for CPU to autoscale # # based on CPU utilization diff --git a/weather_dl_v2/fastapi-server/server_config.json b/weather_dl_v2/fastapi-server/server_config.json deleted file mode 100644 index 65cb7c80..00000000 --- a/weather_dl_v2/fastapi-server/server_config.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "download_collection": "download", - "queues_collection": "queues", - "license_collection": "license", - "manifest_collection": "manifest", - "storage_bucket": "XXXXXXXX", - "gcs_project": "XXXXXXXX", - "license_deployment_image": "XXXXXXXX" -} \ No newline at end of file diff --git a/weather_dl_v2/fastapi-server/server_config.py b/weather_dl_v2/fastapi-server/server_config.py index bf76a291..3daa10aa 100644 --- a/weather_dl_v2/fastapi-server/server_config.py +++ b/weather_dl_v2/fastapi-server/server_config.py @@ -1,6 +1,10 @@ import dataclasses import typing as t import json +import os +import logging + +logger = logging.getLogger(__name__) Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet @@ -14,6 +18,7 @@ class ServerConfig: storage_bucket: str = "" gcs_project: str = "" license_deployment_image: str = "" + welcome_message: str = "" kwargs: t.Optional[t.Dict[str, Values]] = dataclasses.field(default_factory=dict) @classmethod @@ -34,11 +39,19 @@ def from_dict(cls, config: t.Dict): def get_config(): global server_config - server_config_json = "server_config.json" - - if server_config is None: - with open(server_config_json) as file: - config_dict = json.load(file) - server_config = ServerConfig.from_dict(config_dict) + if server_config: + return server_config + + server_config_json = "config/config.json" + if not os.path.exists(server_config_json): + server_config_json = os.environ.get('CONFIG_PATH', None) + + if server_config_json is None: + logger.error(f"Couldn't load config file for fastAPI server.") + raise FileNotFoundError("Couldn't load config file for fastAPI server.") + + with open(server_config_json) as file: + config_dict = json.load(file) + server_config = ServerConfig.from_dict(config_dict) return server_config diff --git a/weather_dl_v2/license_deployment/README.md b/weather_dl_v2/license_deployment/README.md index 348394e6..4c5cc6a1 100644 --- a/weather_dl_v2/license_deployment/README.md +++ b/weather_dl_v2/license_deployment/README.md @@ -7,9 +7,9 @@ conda env create --name weather-dl-v2-license-dep --file=environment.yml conda activate weather-dl-v2-license-dep ``` -### Make changes in deployment config, if required +### Make changes in weather_dl_v2/config.json, if required [for running locally] ``` -Please make approriate changes in deployment_config.json, if required. +export CONFIG_PATH=/path/to/weather_dl_v2/config.json ``` ### Create docker image for license deployment diff --git a/weather_dl_v2/license_deployment/deployment_config.json b/weather_dl_v2/license_deployment/deployment_config.json deleted file mode 100644 index 9058be9d..00000000 --- a/weather_dl_v2/license_deployment/deployment_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "download_collection": "download", - "queues_collection": "queues", - "license_collection": "license", - "manifest_collection": "manifest", - "downloader_k8_image": "XXXXX" -} \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/deployment_config.py b/weather_dl_v2/license_deployment/deployment_config.py index 2c9e0329..b82c6687 100644 --- a/weather_dl_v2/license_deployment/deployment_config.py +++ b/weather_dl_v2/license_deployment/deployment_config.py @@ -1,6 +1,10 @@ import dataclasses import typing as t import json +import os +import logging + +logger = logging.getLogger(__name__) Values = t.Union[t.List["Values"], t.Dict[str, "Values"], bool, int, float, str] # pytype: disable=not-supported-yet @@ -32,11 +36,19 @@ def from_dict(cls, config: t.Dict): def get_config(): global deployment_config - deployment_config_json = "deployment_config.json" + if deployment_config: + return deployment_config + + deployment_config_json = "config/config.json" + if not os.path.exists(deployment_config_json): + deployment_config_json = os.environ.get('CONFIG_PATH', None) + + if deployment_config_json is None: + logger.error(f"Couldn't load config file for license deployment.") + raise FileNotFoundError("Couldn't load config file for license deployment.") - if deployment_config is None: - with open(deployment_config_json) as file: - config_dict = json.load(file) - deployment_config = DeploymentConfig.from_dict(config_dict) + with open(deployment_config_json) as file: + config_dict = json.load(file) + deployment_config = DeploymentConfig.from_dict(config_dict) return deployment_config diff --git a/weather_dl_v2/license_deployment/downloader.yaml b/weather_dl_v2/license_deployment/downloader.yaml index 21d1f6dd..361c2b36 100644 --- a/weather_dl_v2/license_deployment/downloader.yaml +++ b/weather_dl_v2/license_deployment/downloader.yaml @@ -21,8 +21,13 @@ spec: volumeMounts: - name: data mountPath: /data + - name: config-volume + mountPath: ./config restartPolicy: Never volumes: - name: data emptyDir: - sizeLimit: 100Gi \ No newline at end of file + sizeLimit: 100Gi + - name: config-volume + configMap: + name: dl-v2-config \ No newline at end of file From 1b275a45a30bcc54e1bc800f5cdd90aa0fa79934 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Tue, 17 Oct 2023 12:23:59 +0530 Subject: [PATCH 46/51] `dl-v2` pre merge (#403) * lint fixes in downloader_k8s * removed comment --- weather_dl_v2/downloader_kubernetes/downloader_config.py | 2 +- weather_dl_v2/license_deployment/job_creator.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/weather_dl_v2/downloader_kubernetes/downloader_config.py b/weather_dl_v2/downloader_kubernetes/downloader_config.py index 9c226373..21dd300d 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader_config.py +++ b/weather_dl_v2/downloader_kubernetes/downloader_config.py @@ -37,7 +37,7 @@ def get_config(): downloader_config_json = "config/config.json" if not os.path.exists(downloader_config_json): - downloader_config_json = os.environ.get('CONFIG_PATH', None) + downloader_config_json = os.environ.get("CONFIG_PATH", None) if downloader_config_json is None: logger.error(f"Couldn't load config file for downloader.") diff --git a/weather_dl_v2/license_deployment/job_creator.py b/weather_dl_v2/license_deployment/job_creator.py index 6c335acc..1e9e0c0e 100644 --- a/weather_dl_v2/license_deployment/job_creator.py +++ b/weather_dl_v2/license_deployment/job_creator.py @@ -25,8 +25,6 @@ def create_download_job(message): dep = yaml.safe_load(f) uid = uuid.uuid4() dep["metadata"]["name"] = f"downloader-job-id-{uid}" - # d = target_path.rsplit('/')[-1] - # dep['metadata']['name'] = f'a{d}a' dep["spec"]["template"]["spec"]["containers"][0]["command"] = [ "python", "downloader.py", From 050b7bab59546f9bfadd2115ba4faab36ec47acb Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Tue, 17 Oct 2023 16:56:54 +0530 Subject: [PATCH 47/51] `dl v2` added license to dl-v2 (#405) * lint fixes in downloader_k8s * removed comment * added license to dl-v2 --- weather_dl_v2/__init__.py | 15 +++++++++++++++ weather_dl_v2/cli/Dockerfile | 15 +++++++++++++++ weather_dl_v2/cli/app/__init__.py | 15 +++++++++++++++ weather_dl_v2/cli/app/cli_config.py | 15 +++++++++++++++ weather_dl_v2/cli/app/main.py | 15 +++++++++++++++ weather_dl_v2/cli/app/services/__init__.py | 15 +++++++++++++++ .../cli/app/services/download_service.py | 15 +++++++++++++++ weather_dl_v2/cli/app/services/license_service.py | 15 +++++++++++++++ weather_dl_v2/cli/app/services/network_service.py | 15 +++++++++++++++ weather_dl_v2/cli/app/services/queue_service.py | 15 +++++++++++++++ weather_dl_v2/cli/app/subcommands/__init__.py | 15 +++++++++++++++ weather_dl_v2/cli/app/subcommands/config.py | 15 +++++++++++++++ weather_dl_v2/cli/app/subcommands/download.py | 15 +++++++++++++++ weather_dl_v2/cli/app/subcommands/license.py | 15 +++++++++++++++ weather_dl_v2/cli/app/subcommands/queue.py | 15 +++++++++++++++ weather_dl_v2/cli/app/utils.py | 15 +++++++++++++++ weather_dl_v2/cli/setup.py | 15 +++++++++++++++ weather_dl_v2/downloader_kubernetes/Dockerfile | 15 +++++++++++++++ weather_dl_v2/downloader_kubernetes/downloader.py | 15 +++++++++++++++ .../downloader_kubernetes/downloader_config.py | 15 +++++++++++++++ weather_dl_v2/downloader_kubernetes/manifest.py | 15 +++++++++++++++ weather_dl_v2/downloader_kubernetes/util.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/Dockerfile | 15 +++++++++++++++ weather_dl_v2/fastapi-server/__init__.py | 15 +++++++++++++++ .../fastapi-server/config_processing/config.py | 15 +++++++++++++++ .../fastapi-server/config_processing/manifest.py | 15 +++++++++++++++ .../fastapi-server/config_processing/parsers.py | 15 +++++++++++++++ .../fastapi-server/config_processing/partition.py | 15 +++++++++++++++ .../fastapi-server/config_processing/pipeline.py | 15 +++++++++++++++ .../fastapi-server/config_processing/stores.py | 15 +++++++++++++++ .../fastapi-server/config_processing/util.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/database/__init__.py | 15 +++++++++++++++ .../fastapi-server/database/download_handler.py | 15 +++++++++++++++ .../fastapi-server/database/license_handler.py | 15 +++++++++++++++ .../fastapi-server/database/manifest_handler.py | 15 +++++++++++++++ .../fastapi-server/database/queue_handler.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/database/session.py | 15 +++++++++++++++ .../fastapi-server/database/storage_handler.py | 15 +++++++++++++++ .../license_dep/deployment_creator.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/main.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/routers/download.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/routers/license.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/routers/queues.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/server_config.py | 15 +++++++++++++++ weather_dl_v2/fastapi-server/tests/__init__.py | 15 +++++++++++++++ .../fastapi-server/tests/integration/__init__.py | 15 +++++++++++++++ .../tests/integration/test_download.py | 15 +++++++++++++++ .../tests/integration/test_license.py | 15 +++++++++++++++ .../tests/integration/test_queues.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/Dockerfile | 15 +++++++++++++++ weather_dl_v2/license_deployment/__init__.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/clients.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/config.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/database.py | 15 +++++++++++++++ .../license_deployment/deployment_config.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/fetch.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/job_creator.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/manifest.py | 15 +++++++++++++++ weather_dl_v2/license_deployment/util.py | 15 +++++++++++++++ 59 files changed, 885 insertions(+) diff --git a/weather_dl_v2/__init__.py b/weather_dl_v2/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/__init__.py +++ b/weather_dl_v2/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/cli/Dockerfile b/weather_dl_v2/cli/Dockerfile index a2439002..ec3536be 100644 --- a/weather_dl_v2/cli/Dockerfile +++ b/weather_dl_v2/cli/Dockerfile @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + FROM continuumio/miniconda3:latest COPY . . diff --git a/weather_dl_v2/cli/app/__init__.py b/weather_dl_v2/cli/app/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/cli/app/__init__.py +++ b/weather_dl_v2/cli/app/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/cli/app/cli_config.py b/weather_dl_v2/cli/app/cli_config.py index 2ea340d1..592b1923 100644 --- a/weather_dl_v2/cli/app/cli_config.py +++ b/weather_dl_v2/cli/app/cli_config.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import dataclasses import typing as t import json diff --git a/weather_dl_v2/cli/app/main.py b/weather_dl_v2/cli/app/main.py index 64e059f9..03a52577 100644 --- a/weather_dl_v2/cli/app/main.py +++ b/weather_dl_v2/cli/app/main.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import typer import logging from app.cli_config import get_config diff --git a/weather_dl_v2/cli/app/services/__init__.py b/weather_dl_v2/cli/app/services/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/cli/app/services/__init__.py +++ b/weather_dl_v2/cli/app/services/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 654941c8..369a3ab2 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging import json diff --git a/weather_dl_v2/cli/app/services/license_service.py b/weather_dl_v2/cli/app/services/license_service.py index fa8a5ce1..09ff4f3c 100644 --- a/weather_dl_v2/cli/app/services/license_service.py +++ b/weather_dl_v2/cli/app/services/license_service.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging import json diff --git a/weather_dl_v2/cli/app/services/network_service.py b/weather_dl_v2/cli/app/services/network_service.py index c846af3b..4406d91b 100644 --- a/weather_dl_v2/cli/app/services/network_service.py +++ b/weather_dl_v2/cli/app/services/network_service.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import requests import json import logging diff --git a/weather_dl_v2/cli/app/services/queue_service.py b/weather_dl_v2/cli/app/services/queue_service.py index 5f7dab7d..f6824934 100644 --- a/weather_dl_v2/cli/app/services/queue_service.py +++ b/weather_dl_v2/cli/app/services/queue_service.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging import json diff --git a/weather_dl_v2/cli/app/subcommands/__init__.py b/weather_dl_v2/cli/app/subcommands/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/cli/app/subcommands/__init__.py +++ b/weather_dl_v2/cli/app/subcommands/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/cli/app/subcommands/config.py b/weather_dl_v2/cli/app/subcommands/config.py index 9dee6cb5..b2a03aaf 100644 --- a/weather_dl_v2/cli/app/subcommands/config.py +++ b/weather_dl_v2/cli/app/subcommands/config.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import typer import json import os diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 24f5ea8b..2014f137 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import typer from typing_extensions import Annotated from app.services.download_service import download_service diff --git a/weather_dl_v2/cli/app/subcommands/license.py b/weather_dl_v2/cli/app/subcommands/license.py index 30c53113..68dccd1d 100644 --- a/weather_dl_v2/cli/app/subcommands/license.py +++ b/weather_dl_v2/cli/app/subcommands/license.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import typer from typing_extensions import Annotated from app.services.license_service import license_service diff --git a/weather_dl_v2/cli/app/subcommands/queue.py b/weather_dl_v2/cli/app/subcommands/queue.py index f26baa7b..816564ca 100644 --- a/weather_dl_v2/cli/app/subcommands/queue.py +++ b/weather_dl_v2/cli/app/subcommands/queue.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import typer from typing_extensions import Annotated from app.services.queue_service import queue_service diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 866f884e..9d6e76ce 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging import dataclasses diff --git a/weather_dl_v2/cli/setup.py b/weather_dl_v2/cli/setup.py index 75cad67c..509f42fc 100644 --- a/weather_dl_v2/cli/setup.py +++ b/weather_dl_v2/cli/setup.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from setuptools import setup requirements = ["typer", "requests", "tabulate"] diff --git a/weather_dl_v2/downloader_kubernetes/Dockerfile b/weather_dl_v2/downloader_kubernetes/Dockerfile index d96353f7..74084030 100644 --- a/weather_dl_v2/downloader_kubernetes/Dockerfile +++ b/weather_dl_v2/downloader_kubernetes/Dockerfile @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + # Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/weather_dl_v2/downloader_kubernetes/downloader.py b/weather_dl_v2/downloader_kubernetes/downloader.py index 4eab7e0d..c8a5c7dc 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader.py +++ b/weather_dl_v2/downloader_kubernetes/downloader.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """ This program downloads ECMWF data & upload it into GCS. """ diff --git a/weather_dl_v2/downloader_kubernetes/downloader_config.py b/weather_dl_v2/downloader_kubernetes/downloader_config.py index 21dd300d..37d54398 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader_config.py +++ b/weather_dl_v2/downloader_kubernetes/downloader_config.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import dataclasses import typing as t import json diff --git a/weather_dl_v2/downloader_kubernetes/manifest.py b/weather_dl_v2/downloader_kubernetes/manifest.py index bd7c3d73..0bc82264 100644 --- a/weather_dl_v2/downloader_kubernetes/manifest.py +++ b/weather_dl_v2/downloader_kubernetes/manifest.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """Client interface for connecting to a manifest.""" import abc diff --git a/weather_dl_v2/downloader_kubernetes/util.py b/weather_dl_v2/downloader_kubernetes/util.py index a7e3fcc4..58242f1d 100644 --- a/weather_dl_v2/downloader_kubernetes/util.py +++ b/weather_dl_v2/downloader_kubernetes/util.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import datetime import geojson import hashlib diff --git a/weather_dl_v2/fastapi-server/Dockerfile b/weather_dl_v2/fastapi-server/Dockerfile index 08e0c60c..b54e41c0 100644 --- a/weather_dl_v2/fastapi-server/Dockerfile +++ b/weather_dl_v2/fastapi-server/Dockerfile @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + FROM continuumio/miniconda3:latest EXPOSE 8080 diff --git a/weather_dl_v2/fastapi-server/__init__.py b/weather_dl_v2/fastapi-server/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/fastapi-server/__init__.py +++ b/weather_dl_v2/fastapi-server/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/fastapi-server/config_processing/config.py b/weather_dl_v2/fastapi-server/config_processing/config.py index 2677a60c..fe2199b8 100644 --- a/weather_dl_v2/fastapi-server/config_processing/config.py +++ b/weather_dl_v2/fastapi-server/config_processing/config.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import calendar import copy import dataclasses diff --git a/weather_dl_v2/fastapi-server/config_processing/manifest.py b/weather_dl_v2/fastapi-server/config_processing/manifest.py index 22aeb5e8..35a8bf7b 100644 --- a/weather_dl_v2/fastapi-server/config_processing/manifest.py +++ b/weather_dl_v2/fastapi-server/config_processing/manifest.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """Client interface for connecting to a manifest.""" import abc diff --git a/weather_dl_v2/fastapi-server/config_processing/parsers.py b/weather_dl_v2/fastapi-server/config_processing/parsers.py index f3447f28..efb3e393 100644 --- a/weather_dl_v2/fastapi-server/config_processing/parsers.py +++ b/weather_dl_v2/fastapi-server/config_processing/parsers.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """Parsers for ECMWF download configuration.""" import ast diff --git a/weather_dl_v2/fastapi-server/config_processing/partition.py b/weather_dl_v2/fastapi-server/config_processing/partition.py index 6b19ff31..a9f6a9e2 100644 --- a/weather_dl_v2/fastapi-server/config_processing/partition.py +++ b/weather_dl_v2/fastapi-server/config_processing/partition.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging import copy as cp import dataclasses diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index 53534aa4..938644e1 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import getpass import logging import os diff --git a/weather_dl_v2/fastapi-server/config_processing/stores.py b/weather_dl_v2/fastapi-server/config_processing/stores.py index 12f52617..4f60e337 100644 --- a/weather_dl_v2/fastapi-server/config_processing/stores.py +++ b/weather_dl_v2/fastapi-server/config_processing/stores.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """Download destinations, or `Store`s.""" import abc diff --git a/weather_dl_v2/fastapi-server/config_processing/util.py b/weather_dl_v2/fastapi-server/config_processing/util.py index 12217211..e3da5786 100644 --- a/weather_dl_v2/fastapi-server/config_processing/util.py +++ b/weather_dl_v2/fastapi-server/config_processing/util.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging import datetime import geojson diff --git a/weather_dl_v2/fastapi-server/database/__init__.py b/weather_dl_v2/fastapi-server/database/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/fastapi-server/database/__init__.py +++ b/weather_dl_v2/fastapi-server/database/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index 1238a526..7db281eb 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging from firebase_admin import firestore diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index 42477820..4c5b6954 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging from firebase_admin import firestore diff --git a/weather_dl_v2/fastapi-server/database/manifest_handler.py b/weather_dl_v2/fastapi-server/database/manifest_handler.py index 1edafd2e..d5facfab 100644 --- a/weather_dl_v2/fastapi-server/database/manifest_handler.py +++ b/weather_dl_v2/fastapi-server/database/manifest_handler.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging from firebase_admin import firestore diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index d9973920..dfe1ab71 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import logging from firebase_admin import firestore diff --git a/weather_dl_v2/fastapi-server/database/session.py b/weather_dl_v2/fastapi-server/database/session.py index afac904d..85dbc8be 100644 --- a/weather_dl_v2/fastapi-server/database/session.py +++ b/weather_dl_v2/fastapi-server/database/session.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import time import abc import logging diff --git a/weather_dl_v2/fastapi-server/database/storage_handler.py b/weather_dl_v2/fastapi-server/database/storage_handler.py index 32176ec3..fcdf6a1a 100644 --- a/weather_dl_v2/fastapi-server/database/storage_handler.py +++ b/weather_dl_v2/fastapi-server/database/storage_handler.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import os import logging diff --git a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py index 68dd9527..f79521d2 100644 --- a/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py +++ b/weather_dl_v2/fastapi-server/license_dep/deployment_creator.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging from os import path import yaml diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 114e9597..7087d289 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging import os import logging.config diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 96db4c74..c8f6e4e9 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import asyncio import logging import os diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 7202168c..180a47db 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging import re from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends diff --git a/weather_dl_v2/fastapi-server/routers/queues.py b/weather_dl_v2/fastapi-server/routers/queues.py index 5ab7386d..eda6a7c5 100644 --- a/weather_dl_v2/fastapi-server/routers/queues.py +++ b/weather_dl_v2/fastapi-server/routers/queues.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging from fastapi import APIRouter, HTTPException, Depends diff --git a/weather_dl_v2/fastapi-server/server_config.py b/weather_dl_v2/fastapi-server/server_config.py index 3daa10aa..8968ced1 100644 --- a/weather_dl_v2/fastapi-server/server_config.py +++ b/weather_dl_v2/fastapi-server/server_config.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import dataclasses import typing as t import json diff --git a/weather_dl_v2/fastapi-server/tests/__init__.py b/weather_dl_v2/fastapi-server/tests/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/fastapi-server/tests/__init__.py +++ b/weather_dl_v2/fastapi-server/tests/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/fastapi-server/tests/integration/__init__.py b/weather_dl_v2/fastapi-server/tests/integration/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/__init__.py +++ b/weather_dl_v2/fastapi-server/tests/integration/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_download.py b/weather_dl_v2/fastapi-server/tests/integration/test_download.py index 08958737..4d6bb74a 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_download.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_download.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging import os from fastapi.testclient import TestClient diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_license.py b/weather_dl_v2/fastapi-server/tests/integration/test_license.py index ef7a630a..99855344 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_license.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_license.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging import json from fastapi.testclient import TestClient diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py index 624c3890..a9c08c4b 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import logging from main import app from fastapi.testclient import TestClient diff --git a/weather_dl_v2/license_deployment/Dockerfile b/weather_dl_v2/license_deployment/Dockerfile index 2a02d59d..68388f78 100644 --- a/weather_dl_v2/license_deployment/Dockerfile +++ b/weather_dl_v2/license_deployment/Dockerfile @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + FROM continuumio/miniconda3:latest # Update miniconda diff --git a/weather_dl_v2/license_deployment/__init__.py b/weather_dl_v2/license_deployment/__init__.py index e69de29b..f73bba7f 100644 --- a/weather_dl_v2/license_deployment/__init__.py +++ b/weather_dl_v2/license_deployment/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + diff --git a/weather_dl_v2/license_deployment/clients.py b/weather_dl_v2/license_deployment/clients.py index b92a0797..331888ea 100644 --- a/weather_dl_v2/license_deployment/clients.py +++ b/weather_dl_v2/license_deployment/clients.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """ECMWF Downloader Clients.""" import abc diff --git a/weather_dl_v2/license_deployment/config.py b/weather_dl_v2/license_deployment/config.py index 2677a60c..fe2199b8 100644 --- a/weather_dl_v2/license_deployment/config.py +++ b/weather_dl_v2/license_deployment/config.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import calendar import copy import dataclasses diff --git a/weather_dl_v2/license_deployment/database.py b/weather_dl_v2/license_deployment/database.py index bc1414a5..90ee3232 100644 --- a/weather_dl_v2/license_deployment/database.py +++ b/weather_dl_v2/license_deployment/database.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import abc import time import logging diff --git a/weather_dl_v2/license_deployment/deployment_config.py b/weather_dl_v2/license_deployment/deployment_config.py index b82c6687..d8645597 100644 --- a/weather_dl_v2/license_deployment/deployment_config.py +++ b/weather_dl_v2/license_deployment/deployment_config.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import dataclasses import typing as t import json diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index 5e7c4fe7..af0a1507 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from concurrent.futures import ThreadPoolExecutor from google.cloud import secretmanager import json diff --git a/weather_dl_v2/license_deployment/job_creator.py b/weather_dl_v2/license_deployment/job_creator.py index 1e9e0c0e..f0acd802 100644 --- a/weather_dl_v2/license_deployment/job_creator.py +++ b/weather_dl_v2/license_deployment/job_creator.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from os import path import yaml import json diff --git a/weather_dl_v2/license_deployment/manifest.py b/weather_dl_v2/license_deployment/manifest.py index 39d6ad6d..1b5355d8 100644 --- a/weather_dl_v2/license_deployment/manifest.py +++ b/weather_dl_v2/license_deployment/manifest.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + """Client interface for connecting to a manifest.""" import abc diff --git a/weather_dl_v2/license_deployment/util.py b/weather_dl_v2/license_deployment/util.py index 0cd5ecbc..3d74c34b 100644 --- a/weather_dl_v2/license_deployment/util.py +++ b/weather_dl_v2/license_deployment/util.py @@ -1,3 +1,18 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import datetime import logging import geojson From eeeb7705d5c278341640a24ad0f7ed5119f4dcbb Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:40:42 +0530 Subject: [PATCH 48/51] `dl-v2` todos (#407) * added todos * nits * removed old todos * nit --- weather_dl_v2/README.md | 2 ++ weather_dl_v2/cli/app/cli_config.py | 2 ++ weather_dl_v2/cli/app/subcommands/download.py | 2 +- weather_dl_v2/cli/app/utils.py | 2 +- weather_dl_v2/fastapi-server/config_processing/parsers.py | 1 - weather_dl_v2/fastapi-server/config_processing/pipeline.py | 2 +- weather_dl_v2/fastapi-server/main.py | 2 +- weather_dl_v2/fastapi-server/routers/license.py | 4 ++-- weather_dl_v2/license_deployment/database.py | 4 +++- 9 files changed, 13 insertions(+), 8 deletions(-) diff --git a/weather_dl_v2/README.md b/weather_dl_v2/README.md index 574cf16c..868dae84 100644 --- a/weather_dl_v2/README.md +++ b/weather_dl_v2/README.md @@ -1,5 +1,7 @@ ## weather-dl-v2 + + ### Sequence of steps: 1) Refer to downloader_kubernetes/README.md 2) Refer to license_deployment/README.md diff --git a/weather_dl_v2/cli/app/cli_config.py b/weather_dl_v2/cli/app/cli_config.py index 592b1923..9bfeb1de 100644 --- a/weather_dl_v2/cli/app/cli_config.py +++ b/weather_dl_v2/cli/app/cli_config.py @@ -50,6 +50,8 @@ def from_dict(cls, config: t.Dict): def get_config(): global cli_config + # TODO: Update this so cli can work from any folder level. + # Right now it only works in folder where cli_config.json is present. cli_config_json = os.path.join(os.getcwd(), "cli_config.json") if cli_config is None: diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index 2014f137..c52ef776 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -51,7 +51,7 @@ def get_downloads( print(as_table(download_service._list_all_downloads())) - +# TODO: Add support for submitting multiple configs using *.cfg notation. @app.command("add", help="Submit new config to download.") def submit_download( file_path: Annotated[ diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 9d6e76ce..2bc64bde 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -38,7 +38,7 @@ def wrap_func(*args, **kwargs): return wrap_func - +# TODO: Add a flag (may be -j/--json) to support raw response. def as_table(response: str): data = json.loads(response) diff --git a/weather_dl_v2/fastapi-server/config_processing/parsers.py b/weather_dl_v2/fastapi-server/config_processing/parsers.py index efb3e393..5f9e1f5c 100644 --- a/weather_dl_v2/fastapi-server/config_processing/parsers.py +++ b/weather_dl_v2/fastapi-server/config_processing/parsers.py @@ -178,7 +178,6 @@ def _splitlines(block: str) -> t.List[str]: def mars_range_value(token: str) -> t.Union[datetime.date, int, float]: """Converts a range token into either a date, int, or float.""" - # TODO(b/175432034): Recognize time values try: return date(token) except ValueError: diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index 938644e1..7563f949 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -35,7 +35,7 @@ def _do_partitions(partition_obj: PartitionConfig): if partition_obj.new_downloads_only(partition): partition_obj.update_manifest_collection(partition) - +# TODO: Make partitioning faster. async def start_processing_config(config_file, licenses, force_download): config = {} manifest = FirestoreManifest() diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 7087d289..603e75d9 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -53,7 +53,7 @@ async def lifespan(app: FastAPI): os.makedirs(os.path.join(os.getcwd(), "config_files"), exist_ok=True) # Retrieve license information & create license deployment if needed. await create_pending_license_deployments() - + # TODO: Automatically create required indexes on firestore collections on server startup. yield # Clean up diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 180a47db..8d00400d 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -24,8 +24,6 @@ logger = logging.getLogger(__name__) -# TODO: Make use of google secret manager. -# REF: https://cloud.google.com/secret-manager. class License(BaseModel): license_id: str client_name: str @@ -199,3 +197,5 @@ async def delete_license( await queue_handler._remove_license_queue(license_id) background_tasks.add_task(terminate_license_deployment, license_id) return {"license_id": license_id, "message": "License removed successfully."} + +# TODO: Add route to re-deploy license deployments. \ No newline at end of file diff --git a/weather_dl_v2/license_deployment/database.py b/weather_dl_v2/license_deployment/database.py index 90ee3232..7eaf975c 100644 --- a/weather_dl_v2/license_deployment/database.py +++ b/weather_dl_v2/license_deployment/database.py @@ -128,7 +128,9 @@ def _remove_config_from_license_queue( f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}." ) - +# TODO: Firestore transcational fails after reading a document 20 times with roll over. +# This happens when too many licenses try to access the same partition document. +# Find some alternative approach to handle this. @firestore.transactional def get_partition_from_manifest(transaction, config_name: str) -> str | None: db_client = FirestoreClient() From 31b718d0e6719a3c29c80facc2a7b27735326441 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:58:16 +0530 Subject: [PATCH 49/51] `dl v2` lint fixes (#409) * lint fixes * minor fix --- weather_dl_v2/__init__.py | 2 -- weather_dl_v2/cli/app/__init__.py | 2 -- weather_dl_v2/cli/app/services/__init__.py | 2 -- weather_dl_v2/cli/app/services/download_service.py | 4 ++-- weather_dl_v2/cli/app/subcommands/__init__.py | 2 -- weather_dl_v2/cli/app/subcommands/download.py | 4 +++- weather_dl_v2/cli/app/utils.py | 1 + .../downloader_kubernetes/downloader_config.py | 2 +- weather_dl_v2/downloader_kubernetes/util.py | 10 +++++----- weather_dl_v2/fastapi-server/__init__.py | 2 -- .../fastapi-server/config_processing/pipeline.py | 1 + weather_dl_v2/fastapi-server/config_processing/util.py | 10 +++++----- weather_dl_v2/fastapi-server/database/__init__.py | 2 -- weather_dl_v2/fastapi-server/main.py | 4 ++-- weather_dl_v2/fastapi-server/routers/download.py | 1 - weather_dl_v2/fastapi-server/routers/license.py | 3 ++- weather_dl_v2/fastapi-server/server_config.py | 6 +++--- weather_dl_v2/fastapi-server/tests/__init__.py | 2 -- .../fastapi-server/tests/integration/__init__.py | 2 -- weather_dl_v2/license_deployment/__init__.py | 2 -- weather_dl_v2/license_deployment/database.py | 1 + weather_dl_v2/license_deployment/deployment_config.py | 4 ++-- weather_dl_v2/license_deployment/fetch.py | 9 +++++++-- weather_dl_v2/license_deployment/util.py | 10 +++++----- 24 files changed, 40 insertions(+), 48 deletions(-) diff --git a/weather_dl_v2/__init__.py b/weather_dl_v2/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/__init__.py +++ b/weather_dl_v2/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/cli/app/__init__.py b/weather_dl_v2/cli/app/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/cli/app/__init__.py +++ b/weather_dl_v2/cli/app/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/cli/app/services/__init__.py b/weather_dl_v2/cli/app/services/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/cli/app/services/__init__.py +++ b/weather_dl_v2/cli/app/services/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/cli/app/services/download_service.py b/weather_dl_v2/cli/app/services/download_service.py index 369a3ab2..4d467271 100644 --- a/weather_dl_v2/cli/app/services/download_service.py +++ b/weather_dl_v2/cli/app/services/download_service.py @@ -36,7 +36,7 @@ def _list_all_downloads_by_filter(self, filter_dict: dict): @abc.abstractmethod def _get_download_by_config(self, config_name: str): pass - + @abc.abstractmethod def _show_config_content(self, config_name: str): pass @@ -78,7 +78,7 @@ def _get_download_by_config(self, config_name: str): uri=f"{self.endpoint}/{config_name}", header={"accept": "application/json"}, ) - + def _show_config_content(self, config_name: str): return network_service.get( uri=f"{self.endpoint}/show/{config_name}", diff --git a/weather_dl_v2/cli/app/subcommands/__init__.py b/weather_dl_v2/cli/app/subcommands/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/cli/app/subcommands/__init__.py +++ b/weather_dl_v2/cli/app/subcommands/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/cli/app/subcommands/download.py b/weather_dl_v2/cli/app/subcommands/download.py index c52ef776..b16a26e8 100644 --- a/weather_dl_v2/cli/app/subcommands/download.py +++ b/weather_dl_v2/cli/app/subcommands/download.py @@ -51,6 +51,7 @@ def get_downloads( print(as_table(download_service._list_all_downloads())) + # TODO: Add support for submitting multiple configs using *.cfg notation. @app.command("add", help="Submit new config to download.") def submit_download( @@ -76,8 +77,9 @@ def get_download_by_config( ): print(as_table(download_service._get_download_by_config(config_name))) + @app.command("show", help="Show contents of a particular config.") -def get_download_by_config( +def show_config( config_name: Annotated[str, typer.Argument(help="Config file name.")] ): print(download_service._show_config_content(config_name)) diff --git a/weather_dl_v2/cli/app/utils.py b/weather_dl_v2/cli/app/utils.py index 2bc64bde..1ced5c7b 100644 --- a/weather_dl_v2/cli/app/utils.py +++ b/weather_dl_v2/cli/app/utils.py @@ -38,6 +38,7 @@ def wrap_func(*args, **kwargs): return wrap_func + # TODO: Add a flag (may be -j/--json) to support raw response. def as_table(response: str): data = json.loads(response) diff --git a/weather_dl_v2/downloader_kubernetes/downloader_config.py b/weather_dl_v2/downloader_kubernetes/downloader_config.py index 37d54398..247ae664 100644 --- a/weather_dl_v2/downloader_kubernetes/downloader_config.py +++ b/weather_dl_v2/downloader_kubernetes/downloader_config.py @@ -55,7 +55,7 @@ def get_config(): downloader_config_json = os.environ.get("CONFIG_PATH", None) if downloader_config_json is None: - logger.error(f"Couldn't load config file for downloader.") + logger.error("Couldn't load config file for downloader.") raise FileNotFoundError("Couldn't load config file for downloader.") with open(downloader_config_json) as file: diff --git a/weather_dl_v2/downloader_kubernetes/util.py b/weather_dl_v2/downloader_kubernetes/util.py index 58242f1d..5777234f 100644 --- a/weather_dl_v2/downloader_kubernetes/util.py +++ b/weather_dl_v2/downloader_kubernetes/util.py @@ -106,13 +106,13 @@ def to_json_serializable_type(value: t.Any) -> t.Any: return None elif np.issubdtype(type(value), np.floating): return float(value) - elif type(value) == np.ndarray: + elif isinstance(value, np.ndarray): # Will return a scaler if array is of size 1, else will return a list. return value.tolist() elif ( - type(value) == datetime.datetime - or type(value) == str - or type(value) == np.datetime64 + isinstance(value, datetime.datetime) + or isinstance(value, str) + or isinstance(value, np.datetime64) ): # Assume strings are ISO format timestamps... try: @@ -134,7 +134,7 @@ def to_json_serializable_type(value: t.Any) -> t.Any: # We assume here that naive timestamps are in UTC timezone. return value.replace(tzinfo=datetime.timezone.utc).isoformat() - elif type(value) == np.timedelta64: + elif isinstance(value, np.timedelta64): # Return time delta in seconds. return float(value / np.timedelta64(1, "s")) # This check must happen after processing np.timedelta64 and np.datetime64. diff --git a/weather_dl_v2/fastapi-server/__init__.py b/weather_dl_v2/fastapi-server/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/fastapi-server/__init__.py +++ b/weather_dl_v2/fastapi-server/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/fastapi-server/config_processing/pipeline.py b/weather_dl_v2/fastapi-server/config_processing/pipeline.py index 7563f949..175dd798 100644 --- a/weather_dl_v2/fastapi-server/config_processing/pipeline.py +++ b/weather_dl_v2/fastapi-server/config_processing/pipeline.py @@ -35,6 +35,7 @@ def _do_partitions(partition_obj: PartitionConfig): if partition_obj.new_downloads_only(partition): partition_obj.update_manifest_collection(partition) + # TODO: Make partitioning faster. async def start_processing_config(config_file, licenses, force_download): config = {} diff --git a/weather_dl_v2/fastapi-server/config_processing/util.py b/weather_dl_v2/fastapi-server/config_processing/util.py index e3da5786..765a9c47 100644 --- a/weather_dl_v2/fastapi-server/config_processing/util.py +++ b/weather_dl_v2/fastapi-server/config_processing/util.py @@ -109,13 +109,13 @@ def to_json_serializable_type(value: t.Any) -> t.Any: return None elif np.issubdtype(type(value), np.floating): return float(value) - elif type(value) == np.ndarray: + elif isinstance(value, np.ndarray): # Will return a scaler if array is of size 1, else will return a list. return value.tolist() elif ( - type(value) == datetime.datetime - or type(value) == str - or type(value) == np.datetime64 + isinstance(value, datetime.datetime) + or isinstance(value, str) + or isinstance(value, np.datetime64) ): # Assume strings are ISO format timestamps... try: @@ -137,7 +137,7 @@ def to_json_serializable_type(value: t.Any) -> t.Any: # We assume here that naive timestamps are in UTC timezone. return value.replace(tzinfo=datetime.timezone.utc).isoformat() - elif type(value) == np.timedelta64: + elif isinstance(value, np.timedelta64): # Return time delta in seconds. return float(value / np.timedelta64(1, "s")) # This check must happen after processing np.timedelta64 and np.datetime64. diff --git a/weather_dl_v2/fastapi-server/database/__init__.py b/weather_dl_v2/fastapi-server/database/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/fastapi-server/database/__init__.py +++ b/weather_dl_v2/fastapi-server/database/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/fastapi-server/main.py b/weather_dl_v2/fastapi-server/main.py index 603e75d9..05124123 100644 --- a/weather_dl_v2/fastapi-server/main.py +++ b/weather_dl_v2/fastapi-server/main.py @@ -36,8 +36,8 @@ async def create_pending_license_deployments(): create_deployment = get_create_deployment() license_list = await license_handler._get_license_without_deployment() - for license in license_list: - license_id = license["license_id"] + for _license in license_list: + license_id = _license["license_id"] try: logger.info(f"Creating license deployment for {license_id}.") await create_deployment(license_id, license_handler) diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index c8f6e4e9..03f82122 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -22,7 +22,6 @@ from enum import Enum from config_processing.parsers import parse_config, process_config from config_processing.config import Config -from server_config import get_config from fastapi import APIRouter, HTTPException, BackgroundTasks, UploadFile, Depends, Body from config_processing.pipeline import start_processing_config from database.download_handler import DownloadHandler, get_download_handler diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 8d00400d..9c73dca0 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -198,4 +198,5 @@ async def delete_license( background_tasks.add_task(terminate_license_deployment, license_id) return {"license_id": license_id, "message": "License removed successfully."} -# TODO: Add route to re-deploy license deployments. \ No newline at end of file + +# TODO: Add route to re-deploy license deployments. diff --git a/weather_dl_v2/fastapi-server/server_config.py b/weather_dl_v2/fastapi-server/server_config.py index 8968ced1..4ca8c21b 100644 --- a/weather_dl_v2/fastapi-server/server_config.py +++ b/weather_dl_v2/fastapi-server/server_config.py @@ -59,10 +59,10 @@ def get_config(): server_config_json = "config/config.json" if not os.path.exists(server_config_json): - server_config_json = os.environ.get('CONFIG_PATH', None) - + server_config_json = os.environ.get("CONFIG_PATH", None) + if server_config_json is None: - logger.error(f"Couldn't load config file for fastAPI server.") + logger.error("Couldn't load config file for fastAPI server.") raise FileNotFoundError("Couldn't load config file for fastAPI server.") with open(server_config_json) as file: diff --git a/weather_dl_v2/fastapi-server/tests/__init__.py b/weather_dl_v2/fastapi-server/tests/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/fastapi-server/tests/__init__.py +++ b/weather_dl_v2/fastapi-server/tests/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/fastapi-server/tests/integration/__init__.py b/weather_dl_v2/fastapi-server/tests/integration/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/__init__.py +++ b/weather_dl_v2/fastapi-server/tests/integration/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/license_deployment/__init__.py b/weather_dl_v2/license_deployment/__init__.py index f73bba7f..5678014c 100644 --- a/weather_dl_v2/license_deployment/__init__.py +++ b/weather_dl_v2/license_deployment/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - diff --git a/weather_dl_v2/license_deployment/database.py b/weather_dl_v2/license_deployment/database.py index 7eaf975c..23c0f064 100644 --- a/weather_dl_v2/license_deployment/database.py +++ b/weather_dl_v2/license_deployment/database.py @@ -128,6 +128,7 @@ def _remove_config_from_license_queue( f"Updated {license_id} queue in 'queues' collection. Update_time: {result.update_time}." ) + # TODO: Firestore transcational fails after reading a document 20 times with roll over. # This happens when too many licenses try to access the same partition document. # Find some alternative approach to handle this. diff --git a/weather_dl_v2/license_deployment/deployment_config.py b/weather_dl_v2/license_deployment/deployment_config.py index d8645597..8ae162ea 100644 --- a/weather_dl_v2/license_deployment/deployment_config.py +++ b/weather_dl_v2/license_deployment/deployment_config.py @@ -56,10 +56,10 @@ def get_config(): deployment_config_json = "config/config.json" if not os.path.exists(deployment_config_json): - deployment_config_json = os.environ.get('CONFIG_PATH', None) + deployment_config_json = os.environ.get("CONFIG_PATH", None) if deployment_config_json is None: - logger.error(f"Couldn't load config file for license deployment.") + logger.error("Couldn't load config file for license deployment.") raise FileNotFoundError("Couldn't load config file for license deployment.") with open(deployment_config_json) as file: diff --git a/weather_dl_v2/license_deployment/fetch.py b/weather_dl_v2/license_deployment/fetch.py index af0a1507..63adb33a 100644 --- a/weather_dl_v2/license_deployment/fetch.py +++ b/weather_dl_v2/license_deployment/fetch.py @@ -30,6 +30,7 @@ db_client = FirestoreClient() secretmanager_client = secretmanager.SecretManagerServiceClient() + def create_job(request, result): res = { "config_name": request["config_name"], @@ -81,7 +82,9 @@ def fetch_request_from_db(): if not request: db_client._remove_config_from_license_queue(license_id, config_name) except Exception as e: - logger.error(f"Error in fetch_request_from_db for {config_name}. error: {e}.") + logger.error( + f"Error in fetch_request_from_db for {config_name}. error: {e}." + ) return request @@ -126,7 +129,9 @@ def boot_up(license: str) -> None: if __name__ == "__main__": license = sys.argv[2] global logger - logging.basicConfig(level=logging.INFO, format=f'[{license}] %(levelname)s - %(message)s') + logging.basicConfig( + level=logging.INFO, format=f"[{license}] %(levelname)s - %(message)s" + ) logger = logging.getLogger(__name__) logger.info(f"Deployment for license: {license}.") diff --git a/weather_dl_v2/license_deployment/util.py b/weather_dl_v2/license_deployment/util.py index 3d74c34b..14b1f827 100644 --- a/weather_dl_v2/license_deployment/util.py +++ b/weather_dl_v2/license_deployment/util.py @@ -119,13 +119,13 @@ def to_json_serializable_type(value: t.Any) -> t.Any: return None elif np.issubdtype(type(value), np.floating): return float(value) - elif type(value) == np.ndarray: + elif isinstance(value, np.ndarray): # Will return a scaler if array is of size 1, else will return a list. return value.tolist() elif ( - type(value) == datetime.datetime - or type(value) == str - or type(value) == np.datetime64 + isinstance(value, datetime.datetime) + or isinstance(value, str) + or isinstance(value, np.datetime64) ): # Assume strings are ISO format timestamps... try: @@ -147,7 +147,7 @@ def to_json_serializable_type(value: t.Any) -> t.Any: # We assume here that naive timestamps are in UTC timezone. return value.replace(tzinfo=datetime.timezone.utc).isoformat() - elif type(value) == np.timedelta64: + elif isinstance(value, np.timedelta64): # Return time delta in seconds. return float(value / np.timedelta64(1, "s")) # This check must happen after processing np.timedelta64 and np.datetime64. From b809f86d2d42ea0ffeacc1ce5eaee48e02a4561d Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:20:40 +0530 Subject: [PATCH 50/51] `dl-v2` server test fix (#412) * fixed license tests * fixed download tests * fixed license tests * updated name for missing config condition * updated readme * skipping dl-v2 tests in CI --- .github/workflows/ci.yml | 2 +- weather_dl_v2/README.md | 2 ++ .../database/download_handler.py | 10 ++++---- .../database/license_handler.py | 6 +++-- .../fastapi-server/database/queue_handler.py | 2 +- .../fastapi-server/routers/download.py | 4 ++-- .../fastapi-server/routers/license.py | 4 ++-- .../tests/integration/test_download.py | 13 +++++------ .../tests/integration/test_license.py | 23 +++++++++++-------- .../tests/integration/test_queues.py | 18 +++++++-------- .../test_data/{no_exist.cfg => not_exist.cfg} | 0 11 files changed, 46 insertions(+), 38 deletions(-) rename weather_dl_v2/fastapi-server/tests/test_data/{no_exist.cfg => not_exist.cfg} (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3ebeb98..37c3277a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,7 +61,7 @@ jobs: run: python -m metview selfcheck - name: Run unit tests shell: bash -l {0} - run: pytest --memray + run: pytest --memray --ignore=weather_dl_v2 # Ignoring dl-v2 as it only supports py3.10 lint: runs-on: ubuntu-latest strategy: diff --git a/weather_dl_v2/README.md b/weather_dl_v2/README.md index 868dae84..ea7b7bb5 100644 --- a/weather_dl_v2/README.md +++ b/weather_dl_v2/README.md @@ -2,6 +2,8 @@ +> **_NOTE:_** weather-dl-v2 only supports python 3.10 + ### Sequence of steps: 1) Refer to downloader_kubernetes/README.md 2) Refer to license_deployment/README.md diff --git a/weather_dl_v2/fastapi-server/database/download_handler.py b/weather_dl_v2/fastapi-server/database/download_handler.py index 7db281eb..1377e5b4 100644 --- a/weather_dl_v2/fastapi-server/database/download_handler.py +++ b/weather_dl_v2/fastapi-server/database/download_handler.py @@ -80,18 +80,20 @@ async def _mark_partitioning_status(self, config_name: str, status: str) -> None ) async def _check_download_exists(self, config_name: str) -> bool: - if config_name == "no_exist": + if config_name == "not_exist": return False - elif config_name == "no_exist.cfg": + elif config_name == "not_exist.cfg": return False else: return True async def _get_downloads(self, client_name: str) -> list: - return [{"config_name": "example.cfg", "client_name": "client"}] + return [{"config_name": "example.cfg", "client_name": "client", "status": "partitioning completed."}] async def _get_download_by_config_name(self, config_name: str): - return {"config_name": "example.cfg", "client_name": "client"} + if config_name == "not_exist": + return None + return {"config_name": "example.cfg", "client_name": "client", "status": "partitioning completed."} class DownloadHandlerFirestore(DownloadHandler): diff --git a/weather_dl_v2/fastapi-server/database/license_handler.py b/weather_dl_v2/fastapi-server/database/license_handler.py index 4c5b6954..d4878e25 100644 --- a/weather_dl_v2/fastapi-server/database/license_handler.py +++ b/weather_dl_v2/fastapi-server/database/license_handler.py @@ -89,13 +89,15 @@ async def _update_license(self, license_id: str, license_dict: dict) -> None: ) async def _check_license_exists(self, license_id: str) -> bool: - if license_id == "no_exists": + if license_id == "not_exist": + return False + elif license_id == "no-exists": return False else: return True async def _get_license_by_license_id(self, license_id: str) -> dict: - if license_id == "no_exists": + if license_id == "not_exist": return None return { "license_id": license_id, diff --git a/weather_dl_v2/fastapi-server/database/queue_handler.py b/weather_dl_v2/fastapi-server/database/queue_handler.py index dfe1ab71..1909d583 100644 --- a/weather_dl_v2/fastapi-server/database/queue_handler.py +++ b/weather_dl_v2/fastapi-server/database/queue_handler.py @@ -100,7 +100,7 @@ async def _get_queues(self) -> list: return [{"client_name": "dummy_client", "license_id": "L1", "queue": []}] async def _get_queue_by_license_id(self, license_id: str) -> dict: - if license_id == "no_exists": + if license_id == "not_exist": return None return {"client_name": "dummy_client", "license_id": license_id, "queue": []} diff --git a/weather_dl_v2/fastapi-server/routers/download.py b/weather_dl_v2/fastapi-server/routers/download.py index 03f82122..e3de4b57 100644 --- a/weather_dl_v2/fastapi-server/routers/download.py +++ b/weather_dl_v2/fastapi-server/routers/download.py @@ -83,8 +83,8 @@ def get_fetch_config_stats(): def get_fetch_config_stats_mock(): - def fetch_config_stats( - config_name: str, client_name: str, manifest_handler: ManifestHandler + async def fetch_config_stats( + config_name: str, client_name: str, status: str, manifest_handler: ManifestHandler ): return { "config_name": config_name, diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 9c73dca0..2dfb3419 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -69,8 +69,8 @@ async def create_deployment(license_id: str, license_handler: LicenseHandler): def get_create_deployment_mock(): - def create_deployment_mock(license_id: str, license_handler: LicenseHandler): - logger.info("create deployment mock.") + async def create_deployment_mock(license_id: str, license_handler: LicenseHandler): + logger.info("create deployment mock.") return create_deployment_mock diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_download.py b/weather_dl_v2/fastapi-server/tests/integration/test_download.py index 4d6bb74a..fc707d10 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_download.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_download.py @@ -78,11 +78,11 @@ def test_submit_download_basic(): header = { "accept": "application/json", } - file_path = os.path.join(ROOT_DIR, "tests/test_data/no_exist.cfg") + file_path = os.path.join(ROOT_DIR, "tests/test_data/not_exist.cfg") licenses = ["L1"] code = 200 expected = { - "message": f"file 'no_exist.cfg' saved at '{os.getcwd()}/tests/test_data/no_exist.cfg' " + "message": f"file 'not_exist.cfg' saved at '{os.getcwd()}/tests/test_data/not_exist.cfg' " "successfully." } @@ -141,16 +141,15 @@ def test_get_download_by_config_basic(): def test_get_download_by_config_wrong_config(): headers = {} - config_name = "no_exist" + config_name = "not_exist" code = 404 - expected = {"detail": "Download config not found in weather-dl v2."} + expected = {"detail": "Download config not_exist not found in weather-dl v2."} _get_download_by_config(headers, config_name, code, expected) def _delete_download_by_config(headers, config_name, code, expected): response = client.delete(f"/download/{config_name}", headers=headers) - assert response.status_code == code assert response.json() == expected @@ -169,8 +168,8 @@ def test_delete_download_by_config_basic(): def test_delete_download_by_config_wrong_config(): headers = {} - config_name = "no_exist" + config_name = "not_exist" code = 404 - expected = {"detail": "No such download config to stop & remove."} + expected = {"detail": "No such download config not_exist to stop & remove."} _delete_download_by_config(headers, config_name, code, expected) diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_license.py b/weather_dl_v2/fastapi-server/tests/integration/test_license.py index 99855344..f4a5dea7 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_license.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_license.py @@ -86,6 +86,8 @@ def _add_license(headers, payload, code, expected): params={"license_id": "L1"}, ) + print(f"test add license {response.json()}") + assert response.status_code == code assert response.json() == expected @@ -93,11 +95,10 @@ def _add_license(headers, payload, code, expected): def test_add_license_basic(): headers = {"accept": "application/json", "Content-Type": "application/json"} license = { + "license_id": "no-exists", "client_name": "dummy_client", "number_of_requests": 0, "secret_id": "xxxx", - "api_email": "email", - "k8s_deployment_id": "k1", } payload = license code = 200 @@ -131,10 +132,10 @@ def test_get_license_by_license_id(): def test_get_license_wrong_license(): headers = {} - license_id = "no_exists" + license_id = "not_exist" code = 404 expected = { - "detail": "License not found.", + "detail": "License not_exist not found.", } _get_license_by_license_id(headers, license_id, code, expected) @@ -145,6 +146,8 @@ def _update_license(headers, license_id, license, code, expected): f"/license/{license_id}", headers=headers, data=json.dumps(license) ) + print(f"_update license {response.json()}") + assert response.status_code == code assert response.json() == expected @@ -153,10 +156,10 @@ def test_update_license_basic(): headers = {} license_id = "L1" license = { + "license_id": "L1", "client_name": "dummy_client", "number_of_requests": 0, "secret_id": "xxxx", - "api_email": "email", } code = 200 expected = {"license_id": license_id, "name": "License updated successfully."} @@ -166,15 +169,15 @@ def test_update_license_basic(): def test_update_license_wrong_license_id(): headers = {} - license_id = "no_exists" + license_id = "no-exists" license = { + "license_id": "no-exists", "client_name": "dummy_client", "number_of_requests": 0, "secret_id": "xxxx", - "api_email": "email", } code = 404 - expected = {"detail": "No such license to update."} + expected = {"detail": "No such license no-exists to update."} _update_license(headers, license_id, license, code, expected) @@ -197,8 +200,8 @@ def test_delete_license_basic(): def test_delete_license_wrong_license(): headers = {} - license_id = "no_exists" + license_id = "not_exist" code = 404 - expected = {"detail": "No such license to delete."} + expected = {"detail": "No such license not_exist to delete."} _delete_license(headers, license_id, code, expected) diff --git a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py index a9c08c4b..5fa7855a 100644 --- a/weather_dl_v2/fastapi-server/tests/integration/test_queues.py +++ b/weather_dl_v2/fastapi-server/tests/integration/test_queues.py @@ -73,9 +73,9 @@ def test_get_queue_by_license_basic(): def test_get_queue_by_license_wrong_license(): headers = {} - license_id = "no_exists" + license_id = "not_exist" code = 404 - expected = {"detail": "License's priority not found."} + expected = {"detail": 'License priority for not_exist not found.'} _get_queue_by_license(headers, license_id, code, expected) @@ -99,10 +99,10 @@ def test_modify_license_queue_basic(): def test_modify_license_queue_wrong_license_id(): headers = {} - license_id = "no_exists" + license_id = "not_exist" priority_list = [] code = 404 - expected = {"detail": "License's priority not found."} + expected = {"detail": 'License not_exist not found.'} _modify_license_queue(headers, license_id, priority_list, code, expected) @@ -122,7 +122,7 @@ def test_modify_config_priority_in_license_basic(): query = {"config_name": "example.cfg", "priority": 0} code = 200 expected = { - "message": f"'{license_id}' license 'example.cfg' priority updated successfully." + "message": f"'{license_id}' license -- 'example.cfg' priority updated successfully." } _modify_config_priority_in_license(headers, license_id, query, code, expected) @@ -130,19 +130,19 @@ def test_modify_config_priority_in_license_basic(): def test_modify_config_priority_in_license_wrong_license(): headers = {} - license_id = "no_exists" + license_id = "not_exist" query = {"config_name": "example.cfg", "priority": 0} code = 404 - expected = {"detail": "License's priority not found."} + expected = {"detail": 'License not_exist not found.'} _modify_config_priority_in_license(headers, license_id, query, code, expected) def test_modify_config_priority_in_license_wrong_config(): headers = {} - license_id = "no_exists" + license_id = "not_exist" query = {"config_name": "wrong.cfg", "priority": 0} code = 404 - expected = {"detail": "License's priority not found."} + expected = {"detail": 'License not_exist not found.'} _modify_config_priority_in_license(headers, license_id, query, code, expected) diff --git a/weather_dl_v2/fastapi-server/tests/test_data/no_exist.cfg b/weather_dl_v2/fastapi-server/tests/test_data/not_exist.cfg similarity index 100% rename from weather_dl_v2/fastapi-server/tests/test_data/no_exist.cfg rename to weather_dl_v2/fastapi-server/tests/test_data/not_exist.cfg From f55c756a56efe6670ddea953190ba97e132b0ab7 Mon Sep 17 00:00:00 2001 From: aniketinfocusp <122869307+aniketinfocusp@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:30:12 +0530 Subject: [PATCH 51/51] lint fixes (#413) --- weather_dl/download_pipeline/util.py | 2 +- weather_dl_v2/fastapi-server/routers/license.py | 2 +- weather_mv/loader_pipeline/streaming.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/weather_dl/download_pipeline/util.py b/weather_dl/download_pipeline/util.py index faf12d0b..e81c8920 100644 --- a/weather_dl/download_pipeline/util.py +++ b/weather_dl/download_pipeline/util.py @@ -104,7 +104,7 @@ def to_json_serializable_type(value: t.Any) -> t.Any: elif type(value) == np.ndarray: # Will return a scaler if array is of size 1, else will return a list. return value.tolist() - elif type(value) == datetime.datetime or type(value) == str or type(value) == np.datetime64: + elif isinstance(value, datetime.datetime) or isinstance(value, str) or isinstance(value, np.datetime64): # Assume strings are ISO format timestamps... try: value = datetime.datetime.fromisoformat(value) diff --git a/weather_dl_v2/fastapi-server/routers/license.py b/weather_dl_v2/fastapi-server/routers/license.py index 2dfb3419..05ac5139 100644 --- a/weather_dl_v2/fastapi-server/routers/license.py +++ b/weather_dl_v2/fastapi-server/routers/license.py @@ -70,7 +70,7 @@ async def create_deployment(license_id: str, license_handler: LicenseHandler): def get_create_deployment_mock(): async def create_deployment_mock(license_id: str, license_handler: LicenseHandler): - logger.info("create deployment mock.") + logger.info("create deployment mock.") return create_deployment_mock diff --git a/weather_mv/loader_pipeline/streaming.py b/weather_mv/loader_pipeline/streaming.py index eaf407c1..2badb939 100644 --- a/weather_mv/loader_pipeline/streaming.py +++ b/weather_mv/loader_pipeline/streaming.py @@ -86,7 +86,7 @@ def try_parse_message(cls, message_body: t.Union[str, t.Dict]) -> t.Dict: try: return json.loads(message_body) except (json.JSONDecodeError, TypeError): - if type(message_body) is dict: + if isinstance(message_body, dict): return message_body raise