diff --git a/pdgstaging/ConfigManager.py b/pdgstaging/ConfigManager.py index 81e4ef8..8800c94 100644 --- a/pdgstaging/ConfigManager.py +++ b/pdgstaging/ConfigManager.py @@ -1265,7 +1265,7 @@ def get_deduplication_config(self, gdf=None): for f in files: try: - footprints[f] = self.footprint_path_from_input(f) + footprints[f] = self.footprint_path_from_input(f, check_exists=True) except FileNotFoundError: logger.warning( f'No footprint files found for file {f}. ' @@ -1349,10 +1349,10 @@ def footprint_path_from_input(self, path, check_exists=False): path = os.path.join(dir_footprints, path + ext_footprints) if check_exists: if os.path.exists(path): - logger.info('Successfully found footprint file: {}'.format(path)) + logger.info(f'Successfully found footprint file: {path}') return path else: - logger.info('Failed to find footprint file: {}'.format(path)) + logger.info(f'Failed to find footprint file: {path}') raise FileNotFoundError(path) else: return path diff --git a/pdgstaging/Deduplicator.py b/pdgstaging/Deduplicator.py index 0e53794..4145a38 100644 --- a/pdgstaging/Deduplicator.py +++ b/pdgstaging/Deduplicator.py @@ -521,7 +521,11 @@ def deduplicate_by_footprint( # paths and load the footprints if all([isinstance(v, str) for v in footprints.values()]): for name, path in footprints.items(): - footprints[name] = gpd.read_file(path) + try: + footprints[name] = gpd.read_file(path) + except Exception: + footprints[name] = None + warnings.warn(f'Footprint missing for {name}') # Add a column to the GeoDataFrame that contains the filename prop_filename_temp = 'filename_' + uuid.uuid4().hex @@ -534,9 +538,12 @@ def deduplicate_by_footprint( # Clip to gdf to the extent of the footprints if clip_to_footprint: for name, gdf_grp in gdf_dict.items(): + fp = footprints.get(name) + if fp is None: + continue clip_results = clip_gdf( gdf=gdf_grp.copy(), - boundary=footprints[name].copy(), + boundary=fp.copy(), method=clip_method) gdf_dict[name] = clip_results['keep'] removed.append(clip_results['removed']) @@ -557,8 +564,10 @@ def deduplicate_by_footprint( for pair in itertools.combinations(names, 2): name1 = pair[0] name2 = pair[1] - footprint1 = footprints[name1] - footprint2 = footprints[name2] + footprint1 = footprints.get(name1) + footprint2 = footprints.get(name2) + if(footprint1 is None or footprint2 is None): + continue # Get overlap between two footprints overlap = gpd.GeoDataFrame( @@ -587,7 +596,7 @@ def deduplicate_by_footprint( removed.append(to_reduce[overlap_boolean]) # Recombine the GDFs from the dictionary - keep = pd.concat(gdf_dict.values()) + keep = pd.concat(gdf_dict.values(), ignore_index=True) removed = pd.concat(removed) to_return = { @@ -596,7 +605,7 @@ def deduplicate_by_footprint( } if return_intersections: - to_return['intersections'] = pd.concat(intersections) + to_return['intersections'] = pd.concat(intersections, ignore_index=True) if label: to_return = label_duplicates(to_return, prop_duplicated) diff --git a/pdgstaging/TileStager.py b/pdgstaging/TileStager.py index 06a358c..a14f676 100644 --- a/pdgstaging/TileStager.py +++ b/pdgstaging/TileStager.py @@ -466,7 +466,7 @@ def combine_and_deduplicate(self, gdf, tile_path): if num_unique_crs != 1: existing_gdf.to_crs(gdf.crs, inplace=True) - gdf = pd.concat(to_concat) + gdf = pd.concat(to_concat, ignore_index=True) dedup_config = self.config.get_deduplication_config(gdf) if dedup_method is None: return gdf diff --git a/setup.py b/setup.py index c34caf0..8a53d96 100644 --- a/setup.py +++ b/setup.py @@ -16,8 +16,8 @@ install_requires=[ 'numpy >= 1.2, < 2.0', 'pandas >= 1.4, < 2.0', - 'shapely >= 2.0b2', - 'geopandas >= 0.12, < 1.0', + 'shapely >= 2, < 3.0', + 'geopandas >= 0.12.2, < 1.0', 'morecantile >= 3.1, < 4.0', 'Rtree >= 0.9, < 1.0', 'filelock >= 3.6, < 4.0',