Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove geopandas and fiona + bugfixes #833

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ubuntu-latest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
sudo apt-get install libstdc++-10-dev libgfortran-10-dev glibc-source openmpi-bin openmpi-common libopenmpi-dev libopenmpi3 libgtk-3-bin libgtk-3-common libgtk-3-dev -y
sudo apt-get install netcdf-bin libnetcdf-dev libnetcdff-dev libnetcdf-c++4 libnetcdf-c++4-dev -y
python -m pip install --upgrade pip
pip3 install wheel dask pyproj fiona bmipy opencv-contrib-python-headless
pip3 install wheel dask pyproj bmipy opencv-contrib-python-headless
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

- name: Install t-route
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ __pycache__/
.env
.python-version
.ipynb_checkpoints/
build/

# pyenv #
#########
Expand Down
3 changes: 1 addition & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ joblib
toolz
Cython
pyyaml
geopandas
pyarrow
deprecated
```
Expand All @@ -84,7 +83,7 @@ To get a sense of the operation of the routing scheme, follow this sequence of c

```shell
# install required python modules
pip3 install numpy pandas xarray netcdf4 joblib toolz pyyaml Cython>3,!=3.0.4 geopandas pyarrow deprecated wheel
pip3 install numpy pandas xarray netcdf4 joblib toolz pyyaml Cython>3,!=3.0.4 pyarrow deprecated wheel

# clone t-toute
git clone --progress --single-branch --branch master http://github.com/NOAA-OWP/t-route.git
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ toolz
joblib
deprecated
pyarrow<12.0
geopandas
2 changes: 0 additions & 2 deletions src/troute-network/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ name = "troute.network"
version = "0.0.0"
dependencies = [
"deprecated",
"geopandas",
"fiona",
"joblib",
"netcdf4",
"numpy",
Expand Down
10 changes: 6 additions & 4 deletions src/troute-network/tests/make_test_network.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import geopandas as gpd
import pandas as pd
import sqlite3
from pathlib import Path
import sys

Expand Down Expand Up @@ -45,9 +45,11 @@ def make_network_from_segment(flowpaths, edges, attributes, depth, segment):
sub_edges.drop('geometry', axis=1).to_json("flowpath_edge_list.json", orient='records', indent=2)

def make_network_from_geopkg(file_path, depth, segment=None):
flowpaths = gpd.read_file(file_path, layer="flowpaths")
attributes = gpd.read_file(file_path, layer="flowpath_attributes")
edges = gpd.read_file(file_path, layer="flowpath_edge_list")
with sqlite3.connect(file_path) as conn:
flowpaths = pd.read_sql_query("SELECT * FROM flowpaths", file_path)
attributes = pd.read_sql_query("SELECT * FROM flowpath_attributes", file_path)
edges = pd.read_sql_query("SELECT * FROM flowpath_edge_list", file_path)

if segment is None:
segment = flowpaths[flowpaths['toid'].str.startswith('tnex')].iloc[0]['id']
make_network_from_segment(flowpaths, edges, attributes, depth, segment)
Expand Down
8 changes: 1 addition & 7 deletions src/troute-network/troute/AbstractNetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,13 +926,7 @@ def filter_diffusive_nexus_pts(self,):
diff_tw_ids = ['nex-' + str(s) for s in diff_tw_ids]
nexus_latlon = nexus_latlon[nexus_latlon['id'].isin(diff_tw_ids)]
nexus_latlon['id'] = nexus_latlon['id'].str.split('-',expand=True).loc[:,1].astype(float).astype(int)
lat_lon_crs = nexus_latlon[['id','geometry']]
lat_lon_crs = lat_lon_crs.to_crs(crs=4326)
lat_lon_crs['lon'] = lat_lon_crs.geometry.x
lat_lon_crs['lat'] = lat_lon_crs.geometry.y
lat_lon_crs['crs'] = str(lat_lon_crs.crs)
lat_lon_crs = lat_lon_crs[['lon','lat','crs']]
self._nexus_latlon = nexus_latlon[['id']].join(lat_lon_crs)
self._nexus_latlon = nexus_latlon[['id','lon','lat', 'crs']]

def get_timesteps_from_nex(nexus_files):
# Return a list of output files
Expand Down
123 changes: 88 additions & 35 deletions src/troute-network/troute/HYFeaturesNetwork.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .AbstractNetwork import AbstractNetwork
import pandas as pd
import numpy as np
import geopandas as gpd
import time
import json
from pathlib import Path
Expand All @@ -13,7 +12,7 @@
from datetime import datetime
from pprint import pformat
import os
import fiona
import sqlite3
import troute.nhd_io as nhd_io #FIXME
from troute.nhd_network import reverse_dict, extract_connections, reverse_network, reachable
from .rfc_lake_gage_crosswalk import get_rfc_lake_gage_crosswalk, get_great_lakes_climatology
Expand All @@ -29,14 +28,19 @@ def find_layer_name(layers, pattern):
if re.search(pattern, layer, re.IGNORECASE):
return layer
return None

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor whitespace diff here...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops, I must not have had black installed

def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
# Retrieve available layers from the GeoPackage
available_layers = fiona.listlayers(file_path)
# Retrieve available layers from the GeoPackage
with sqlite3.connect(file_path) as conn:
# gpkg_contents, gpkg_ogr_contents, and sqlite_sequence all contain the layer names
result = conn.execute("SELECT table_name FROM gpkg_contents;").fetchall()
# fetchall returns a list tuples
available_layers = [r[0] for r in result]

# patterns for the layers we want to find
layer_patterns = {
'flowpaths': r'flow[-_]?paths?|flow[-_]?lines?',
# without $ flowpaths would match also flowpath_attributes
'flowpaths': r'flow[-_]?paths?$|flow[-_]?lines?$',
'flowpath_attributes': r'flow[-_]?path[-_]?attributes?|flow[-_]?line[-_]?attributes?',
'lakes': r'lakes?',
'nexus': r'nexus?',
Expand Down Expand Up @@ -65,15 +69,41 @@ def read_geopkg(file_path, compute_parameters, waterbody_parameters, cpu_pool):
if hybrid_parameters.get('run_hybrid_routing', False) and 'nexus' not in layers_to_read:
layers_to_read.append('nexus')

# Function that read a layer from the geopackage
# Function that reads a layer from the geopackage
def read_layer(layer_name):
if layer_name:
try:
return gpd.read_file(file_path, layer=layer_name)
except Exception as e:
print(f"Error reading {layer_name}: {e}")
return pd.DataFrame()
return pd.DataFrame()
if not layer_name:
return pd.DataFrame()
try:
with sqlite3.connect(file_path) as conn:
has_spatial_metadata = False
# try and get the name of the geometry column and it's crs
geometry_columns = conn.execute(f"""
SELECT c.column_name,g.definition
FROM gpkg_geometry_columns AS c
JOIN gpkg_spatial_ref_sys AS g ON c.srs_id = g.srs_id
WHERE c.table_name = '{layer_name}'""").fetchall()

if len(geometry_columns) > 0:
has_spatial_metadata = True
geom_column = geometry_columns[0][0]
crs = geometry_columns[0][1]

if has_spatial_metadata:
# select everything from the layer, + the midpoint of it's bounding box
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the midpoint of its bounding box.

sql_query = f"""SELECT d.*,
(r.minx + r.maxx) / 2.0 AS lon,
(r.miny + r.maxy) / 2.0 AS lat
FROM {layer_name} AS d
JOIN rtree_{layer_name}_{geom_column} AS r ON d.fid = r.id"""
df = pd.read_sql_query(sql_query, conn)
df['crs'] = crs
return df
else:
return pd.read_sql_query(f"SELECT * FROM {layer_name}", conn)
except Exception as e:
print(f"Error reading layer {layer_name} from {file_path}: {e}")
return pd.DataFrame()


# Retrieve geopackage information using matched layer names
if cpu_pool > 1:
Expand All @@ -83,26 +113,34 @@ def read_layer(layer_name):
table_dict = {layers_to_read[i]: gpkg_list[i] for i in range(len(layers_to_read))}
else:
table_dict = {layer: read_layer(matched_layers[layer]) for layer in layers_to_read}

# Handle different key column names between flowpaths and flowpath_attributes
flowpaths_df = table_dict.get('flowpaths', pd.DataFrame())
flowpath_attributes_df = table_dict.get('flowpath_attributes', pd.DataFrame())

# Check if 'link' column exists and rename it to 'id'
if 'link' in flowpath_attributes_df.columns:
flowpath_attributes_df.rename(columns={'link': 'id'}, inplace=True)

# Merge flowpaths and flowpath_attributes
flowpaths = pd.merge(
flowpaths_df,
flowpath_attributes_df,
on='id',
how='inner'
)

lakes = table_dict.get('lakes', pd.DataFrame())
network = table_dict.get('network', pd.DataFrame())
nexus = table_dict.get('nexus', pd.DataFrame())

# Handle different key column names between flowpaths and flowpath_attributes
flowpaths_df = table_dict.get('flowpaths', None)
flowpath_attributes_df = table_dict.get('flowpath_attributes', None)

if flowpath_attributes_df is not None:
# Check if 'link' column exists and rename it to 'id'
if 'link' in flowpath_attributes_df.columns:
flowpath_attributes_df.rename(columns={'link': 'id'}, inplace=True)

if flowpaths_df is not None and flowpath_attributes_df is not None:
# Merge flowpaths and flowpath_attributes
flowpaths = pd.merge(
flowpaths_df,
flowpath_attributes_df,
on='id',
how='inner'
)
elif flowpaths_df is not None:
flowpaths = flowpaths_df
elif flowpath_attributes_df is not None:
flowpaths = flowpath_attributes_df
else:
raise ValueError("No flowpaths or flowpath_attributes found in the geopackage")

return flowpaths, lakes, network, nexus

Expand All @@ -126,8 +164,18 @@ def read_json(file_path, edge_list):
return df_main

def read_geojson(file_path):
flowpaths = gpd.read_file(file_path)
return flowpaths
with open(file_path) as f:
data = json.load(f)
data = data['features']
df = pd.json_normalize(data,max_level=1)
df.columns = df.columns.str.replace('properties.','')
df = df.drop(columns=['type'])
# Geometry seems to be unused or dropped, in case it is needed:
# geometry type e.g. MULTIPOLYGON, is stored in geometry.type
# and the coordinates are stored in geometry.coordinates
# crs stored in data['crs'] e.g.
# data['crs'] = { "type": "name", "properties": { "name": "urn:ogc:def:crs:EPSG::5070" } }
return df

def numeric_id(flowpath):
id = flowpath['key'].split('-')[-1]
Expand All @@ -138,15 +186,18 @@ def numeric_id(flowpath):

def read_ngen_waterbody_df(parm_file, lake_index_field="wb-id", lake_id_mask=None):
"""
FIXME FUNCTION NEVER CALLED
Reads .gpkg or lake.json file and prepares a dataframe, filtered
to the relevant reservoirs, to provide the parameters
for level-pool reservoir computation.
"""
def node_key_func(x):
return int( x.split('-')[-1] )
if Path(parm_file).suffix=='.gpkg':
df = gpd.read_file(parm_file, layer='lakes')

# This can be made more efficient by reading only the necessary columns
# but I don't have a reference for what columns remain after dropping
with sqlite3.connect(parm_file) as conn:
df = pd.read_sql_query('SELECT * from lakes', conn)
df = (
df.drop(['id','toid','hl_id','hl_reference','hl_uri','geometry'], axis=1)
.rename(columns={'hl_link': 'lake_id'})
Expand All @@ -164,6 +215,7 @@ def node_key_func(x):

def read_ngen_waterbody_type_df(parm_file, lake_index_field="wb-id", lake_id_mask=None):
"""
FIXME FUNCTION NEVER CALLED
"""
#FIXME: this function is likely not correct. Unclear how we will get
# reservoir type from the gpkg files. Information should be in 'crosswalk'
Expand All @@ -173,7 +225,8 @@ def node_key_func(x):
return int( x.split('-')[-1] )

if Path(parm_file).suffix=='.gpkg':
df = gpd.read_file(parm_file, layer="crosswalk").set_index('id')
with sqlite3.connect(parm_file) as conn:
df = pd.read_sql_query('SELECT * FROM crosswalk', conn)
elif Path(parm_file).suffix=='.json':
df = pd.read_json(parm_file, orient="index")

Expand Down
4 changes: 1 addition & 3 deletions test/LowerColorado_TX_v4/run_BMI_Coastal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import pickle
from datetime import datetime, timedelta
import time

Expand All @@ -20,7 +18,7 @@
#import troute_model

from troute.HYFeaturesNetwork import HYFeaturesNetwork
from troute.AbstractNetwork import *
from troute.AbstractNetwork import read_coastal_output

import bmi_df2array as df2a

Expand Down
2 changes: 0 additions & 2 deletions test/LowerColorado_TX_v4/run_with_BMI.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import pickle
from datetime import datetime, timedelta
import time

Expand Down
Loading