Skip to content

Commit

Permalink
Merge pull request #179 from hotosm/feature/predictor_integration
Browse files Browse the repository at this point in the history
Feature : Predictor Module Integration
  • Loading branch information
kshitijrajsharma authored Nov 21, 2023
2 parents 55d5fcd + cfc72f1 commit dbd16bc
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 263 deletions.
28 changes: 0 additions & 28 deletions backend/aiproject/utils.py
Original file line number Diff line number Diff line change
@@ -1,28 +0,0 @@
# https://services.digitalglobe.com/earthservice/tmsaccess/tms/1.0.0/DigitalGlobe:ImageryTileService@EPSG:3857@jpg/{z}/{x}/{y}.jpg?connectId=c2cbd3f2-003a-46ec-9e46-26a3996d6484&flipy=true
import math

tile_size = 256

def convert2worldcd(lat,lng):
"""
World coordinates are measured from the Mercator projection's origin (the northwest corner of the map at 180 degrees longitude and approximately 85 degrees latitude) and increase in the x direction towards the east (right) and increase in the y direction towards the south (down). Because the basic Mercator tile is 256 x 256 pixels, the usable world coordinate space is {0-256}, {0-256}
"""
siny = math.sin((lat * math.pi) / 180)
siny = min(max(siny, -0.9999), 0.9999)
world_x= tile_size * (0.5 + (lng / 360))
world_y = tile_size * (0.5 - math.log((1 + siny) / (1 - siny)) / (4 * math.pi))
print(world_x,world_y)
return world_x,world_y

def latlng2tile(zoom,lat,lng):
"""By dividing the pixel coordinates by the tile size and taking the integer parts of the result, you produce as a by-product the tile coordinate at the current zoom level."""
zoom_byte=1 << zoom #converting zoom level to pixel bytes
# print(zoom_byte)
w_x,w_y=convert2worldcd(lat,lng)
t_x=math.floor((w_x * zoom_byte) / tile_size)
t_y=math.floor((w_y * zoom_byte) / tile_size)
return t_x,t_y

z,x,y=19,-80.6719408929348,35.03247598940751
tile_x,tile_y=latlng2tile(z,x,y)
print(z,tile_x,tile_y)
9 changes: 9 additions & 0 deletions backend/core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ class PredictionParamSerializer(serializers.Serializer):
# for vectorization
tolerance = serializers.FloatField(required=False)
area_threshold = serializers.FloatField(required=False)
tile_overlap_distance = serializers.FloatField(required=False)

def validate_max_angle_change(self, value):
if value is not None:
Expand All @@ -255,6 +256,14 @@ def validate_tolerance(self, value):
)
return value

def validate_tile_overlap_distance(self, value):
if value is not None:
if value < 0 or value > 1:
raise serializers.ValidationError(
f"Invalid Tile Overlap Distance : {value}, Should be between 0 and 1"
)
return value

def validate_area_threshold(self, value):
if value is not None:
if value < 0 or value > 20:
Expand Down
3 changes: 1 addition & 2 deletions backend/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
FeedbackLabelFileSerializer,
LabelFileSerializer,
)
from predictor import download_imagery,get_start_end_download_coords
from core.utils import (
bbox,
download_imagery,
get_start_end_download_coords,
is_dir_empty,
)
from django.conf import settings
Expand Down
154 changes: 0 additions & 154 deletions backend/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,164 +51,10 @@ def bbox(coord_list):
return ret


def convert2worldcd(lat, lng, tile_size):
"""
World coordinates are measured from the Mercator projection's origin
(the northwest corner of the map at 180 degrees longitude and
approximately 85 degrees latitude) and increase in the x direction
towards the east (right) and increase in the y direction towards the south
(down).Because the basic Mercator tile is 256 x 256 pixels, the usable
world coordinate space is {0-256}, {0-256}
"""
siny = math.sin((lat * math.pi) / 180)
siny = min(max(siny, -0.9999), 0.9999)
world_x = tile_size * (0.5 + (lng / 360))
world_y = tile_size * (0.5 - math.log((1 + siny) / (1 - siny)) / (4 * math.pi))
# print("world coordinate space is %s, %s",world_x,world_y)
return world_x, world_y


def latlng2tile(zoom, lat, lng, tile_size):
"""By dividing the pixel coordinates by the tile size and taking the
integer parts of the result, you produce as a by-product the tile
coordinate at the current zoom level."""
zoom_byte = 1 << zoom # converting zoom level to pixel bytes
# print(zoom_byte)
w_x, w_y = convert2worldcd(lat, lng, tile_size)

t_x = math.floor((w_x * zoom_byte) / tile_size)
t_y = math.floor((w_y * zoom_byte) / tile_size)
return t_x, t_y


def get_start_end_download_coords(bbox_coords, zm_level, tile_size):
# start point where we will start downloading the tiles

start_point_lng = bbox_coords[0] # getting the starting lat lng
start_point_lat = bbox_coords[1]

# end point where we should stop downloading the tile
end_point_lng = bbox_coords[2] # getting the ending lat lng
end_point_lat = bbox_coords[3]

# Note : lat=y-axis, lng=x-axis
# getting tile coordinate for first point of bbox
start_x, start_y = latlng2tile(
zoom=zm_level,
lat=start_point_lat,
lng=start_point_lng,
tile_size=tile_size,
)
start = [start_x, start_y]

# getting tile coordinate for last point of bbox
end_x, end_y = latlng2tile(
zoom=zm_level,
lat=end_point_lat,
lng=end_point_lng,
tile_size=tile_size,
)
end = [end_x, end_y]
return start, end


import logging


def is_dir_empty(directory_path):
return not any(os.scandir(directory_path))


def download_image(url, base_path, source_name):
response = requests.get(url)

image = response.content

pattern = r"/(\d+)/(\d+)/(\d+)(?:\.\w+)?"
match = re.search(pattern, url)
# filename = z-x-y
filename = f"{base_path}/{source_name}-{match.group(2)}-{match.group(3)}-{match.group(1)}.png"


with open(filename, "wb") as f:
f.write(image)

# print(f"Downloaded: {url}")


def download_imagery(start: list, end: list, zm_level, base_path, source="maxar"):
"""Downloads imagery from start to end tile coordinate system
Args:
start (list):[tile_x,tile_y]
end (list): [tile_x,tile_y],
source (string): it should be eithre url string or maxar value
zm_level : Zoom level
"""

begin_x = start[0] # this will be the beginning of the download loop for x
begin_y = start[1] # this will be the beginning of the download loop for x
stop_x = end[0] # this will be the end of the download loop for x
stop_y = end[1] # this will be the end of the download loop for x

print(f"Download starting from {start} to {end} using source {source} - {zm_level}")

start_x = begin_x # starting loop from beginning
start_y = begin_y # starting y loop from beginnig
source_name = "OAM" # default
download_urls = []
while start_x <= stop_x: # download x section while keeping y as c
start_y = begin_y
while start_y >= stop_y: # download y section while keeping x as c
download_path = [start_x, start_y]
if source == "maxar":
try:
connect_id = os.environ.get("MAXAR_CONNECT_ID")
except Exception as ex:
raise ex
source_name = source
download_url = f"https://services.digitalglobe.com/earthservice/tmsaccess/tms/1.0.0/DigitalGlobe:ImageryTileService@EPSG:3857@jpg/{zm_level}/{download_path[0]}/{download_path[1]}.jpg?connectId={connect_id}&flipy=true"

# add multiple logic on supported sources here
else:
# source should be url as string , like this : https://tiles.openaerialmap.org/62dbd947d8499800053796ec/0/62dbd947d8499800053796ed/{z}/{x}/{y}
if "{-y}" in source:
## negative TMS
source_value = source.replace("{-y}", "{y}")
# conversion from normal tms
y_value = int((2**zm_level) - download_path[1] - 1)

else:
# If it doesn't, use the positive y-coordinate
y_value = download_path[1]
source_value = source
download_url = source_value.format(
x=download_path[0], y=y_value, z=zm_level)
download_urls.append(download_url)

start_y = start_y - 1 # decrease the y

start_x = start_x + 1 # increase the x

# Use the ThreadPoolExecutor to download the images in parallel

# with concurrent.futures.ThreadPoolExecutor() as executor:
# for url in download_urls:
# executor.submit(download_image, url, base_path, source_name)

with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [
executor.submit(download_image, url, base_path, source_name)
for url in download_urls
]
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as e:
print(f"An exception occurred in a thread: {e}")
raise e


def request_rawdata(request_params):
"""will make call to galaxy API & provides response as json
Expand Down
92 changes: 16 additions & 76 deletions backend/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
import subprocess
import sys
import time
import uuid
import zipfile
from concurrent.futures import ProcessPoolExecutor, TimeoutError
from datetime import datetime
from tempfile import NamedTemporaryFile

Expand All @@ -27,7 +25,6 @@
from django_filters.rest_framework import DjangoFilterBackend
from drf_yasg.utils import swagger_auto_schema
from geojson2osm import geojson2osm
from hot_fair_utilities import polygonize, predict, vectorize
from login.authentication import OsmAuthentication
from login.permissions import IsOsmAuthenticated
from orthogonalizer import othogonalize_poly
Expand All @@ -39,6 +36,8 @@
from rest_framework.views import APIView
from rest_framework_gis.filters import InBBoxFilter, TMSTileFilter

from predictor import predict

from .models import (
AOI,
Dataset,
Expand All @@ -63,12 +62,8 @@
)
from .tasks import train_model
from .utils import (
bbox,
download_imagery,
get_dir_size,
get_start_end_download_coords,
gpx_generator,
is_dir_empty,
process_rawdata,
request_rawdata,
)
Expand Down Expand Up @@ -539,85 +534,36 @@ def post(self, request, *args, **kwargs):
else source_img_in_dataset
)
zoom_level = deserialized_data["zoom_level"]
start, end = get_start_end_download_coords(
bbox, zoom_level, DEFAULT_TILE_SIZE
)
temp_path = f"temp/{uuid.uuid4()}/"
os.makedirs(temp_path,exist_ok=True)
try:
download_imagery(
start,
end,
zoom_level,
base_path=temp_path,
source=source,
)
prediction_output = f"{temp_path}/prediction/output"
print("Image Downloaded , Starting Inference")
if is_dir_empty(temp_path):
return Response("No Images found", status=500)
start_time = time.time()
model_path = os.path.join(
settings.TRAINING_WORKSPACE,
f"dataset_{model_instance.dataset.id}",
"output",
f"training_{training_instance.id}",
"checkpoint.h5",
"checkpoint.tflite",
)
# give high priority to h5 model format if not avilable fall back to .tf
# give high priority to tflite model format if not avilable fall back to .h5 if not use default .tf
if not os.path.exists(model_path):
model_path = os.path.join(
settings.TRAINING_WORKSPACE,
f"dataset_{model_instance.dataset.id}",
"output",
f"training_{training_instance.id}",
"checkpoint.tf",
"checkpoint.h5",
)
# Spawn a new process for the prediction task
with ProcessPoolExecutor(max_workers=1) as executor:
try:
future = executor.submit(
predict,
model_path,
temp_path,
prediction_output,
deserialized_data["confidence"] / 100
if "confidence" in deserialized_data
else 0.5,
if not os.path.exists(model_path):
model_path = os.path.join(
settings.TRAINING_WORKSPACE,
f"dataset_{model_instance.dataset.id}",
"output",
f"training_{training_instance.id}",
"checkpoint.tf",
)
future.result(
timeout=45
) # Wait for process to complete, wait for max 45 sec
except TimeoutError:
print("Prediction Timeout")
return Response(
"Prediction Timeout , Took more than 30 sec : Use smaller models/area",
status=500,
)

print("Prediction is Complete, Vectorizing images")
start = time.time()

geojson_output = f"{prediction_output}/prediction.geojson"
# polygonize(
# input_path=prediction_output,
# output_path=geojson_output,
# remove_inputs=True,
# )

vectorize(
input_path=prediction_output,
output_path=geojson_output,
tolerance=deserialized_data["tolerance"]
if "tolerance" in deserialized_data
else 0.2, # in meters
area_threshold=deserialized_data["area_threshold"]
if "area_threshold" in deserialized_data
else 3, # in sqm
geojson_data = predict(bbox=bbox,model_path=model_path,zoom_level=zoom_level,tms_url=source, tile_size=DEFAULT_TILE_SIZE,confidence=deserialized_data["confidence"] / 100 if "confidence" in deserialized_data else 0.5,tile_overlap_distance=deserialized_data["tile_overlap_distance"] if "tile_overlap_distance" in deserialized_data else 0.15)
print(
f"It took {round(time.time()-start_time)}sec for generating predictions"
)
with open(geojson_output, "r") as f:
geojson_data = json.load(f)

for feature in geojson_data["features"]:
feature["properties"]["building"] = "yes"
feature["properties"]["source"] = "fAIr"
Expand All @@ -632,12 +578,7 @@ def post(self, request, *args, **kwargs):
else 15,
)

shutil.rmtree(temp_path)

print(
f"It took {round(time.time()-start)}sec for vectorization , Produced :{sys.getsizeof(geojson_data)*0.001} kb"
)
print(f"Prediction API took ({round(time.time()-start_time)} sec)")
print(f"Prediction API took ({round(time.time()-start_time)} sec) in total")

## TODO : can send osm xml format from here as well using geojson2osm
return Response(geojson_data, status=status.HTTP_201_CREATED)
Expand All @@ -648,7 +589,6 @@ def post(self, request, *args, **kwargs):
return Response(str(e), status=500)
except Exception as ex:
print(ex)
shutil.rmtree(temp_path)
return Response("Prediction Error", status=500)


Expand Down
Loading

0 comments on commit dbd16bc

Please sign in to comment.