From 1451f74f178c2f0015a0b14a9345f471fb526049 Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Fri, 8 Nov 2024 14:12:05 +1300 Subject: [PATCH 1/8] Install mypy and some stub packages in dev deps --- requirements/dev.in | 11 +++++++++++ requirements/dev.txt | 34 ++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/requirements/dev.in b/requirements/dev.in index 02dac4184..8d2b111be 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -14,3 +14,14 @@ ipython pexpect>4.3 appnope colorama + +mypy + # note: not maintained; not compatible with sqlalchemy 2.x + # so, probably drop this when we upgrade sqlalchemy + sqlalchemy-stubs==0.4 + # NOTE: we actually still have pygit2 1.12, but this is the first release of types-pygit2 + # TODO: upgrade to pygit2 1.16+ and use the builtin types rather than using this package. + types-pygit2==1.14.0.20240317 + + types-tqdm + types-Pygments==2.13.0 diff --git a/requirements/dev.txt b/requirements/dev.txt index 3061840b8..ee7fcf9c6 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# "cmake --build build --target py-requirements" -# +# This file was autogenerated by uv via the following command: +# uv pip compile dev.in -o dev.txt appnope==0.1.3 # via # -r dev.in @@ -33,6 +29,12 @@ jedi==0.18.2 # via ipython matplotlib-inline==0.1.6 # via ipython +mypy==1.13.0 + # via + # -r dev.in + # sqlalchemy-stubs +mypy-extensions==1.0.0 + # via mypy parso==0.8.3 # via jedi pexpect==4.8.0 @@ -58,11 +60,31 @@ six==1.16.0 # -c requirements.txt # -c test.txt # asttokens +sqlalchemy-stubs==0.4 + # via -r dev.in stack-data==0.6.2 # via ipython traitlets==5.7.1 # via # ipython # matplotlib-inline +types-cffi==1.16.0.20240331 + # via types-pygit2 +types-docutils==0.21.0.20241005 + # via types-pygments +types-pygit2==1.14.0.20240317 + # via -r dev.in +types-pygments==2.13.0 + # via -r dev.in +types-setuptools==75.3.0.20241107 + # via + # types-cffi + # types-pygments +types-tqdm==4.66.0.20240417 + # via -r dev.in +typing-extensions==4.12.2 + # via + # mypy + # sqlalchemy-stubs wcwidth==0.2.5 # via prompt-toolkit From 470022f23e683210f8a486192ea7d6bbb80e405b Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Fri, 8 Nov 2024 16:44:01 +1300 Subject: [PATCH 2/8] Add type hints and mypy config --- kart/__init__.py | 11 +- kart/annotations/db.py | 6 +- kart/base_dataset.py | 344 ++++++++++++++++++++++- kart/base_diff_writer.py | 70 +++-- kart/cli_util.py | 10 +- kart/completion.py | 9 +- kart/conflicts_util.py | 7 +- kart/conflicts_writer.py | 104 +++---- kart/dataset_mixins.py | 359 ------------------------- kart/dataset_util.py | 2 +- kart/diff.py | 2 +- kart/diff_structs.py | 11 +- kart/html_diff_writer.py | 5 - kart/json_diff_writers.py | 29 +- kart/key_filters.py | 18 +- kart/merge_util.py | 15 +- kart/meta_items.py | 5 + kart/raw_diff_delta.py | 34 +++ kart/repo.py | 3 +- kart/schema.py | 4 +- kart/show.py | 2 +- kart/spatial_filter/__init__.py | 6 + kart/spatial_filter/index.py | 49 ++-- kart/sqlalchemy/adapter/postgis.py | 1 - kart/sqlalchemy/adapter/sqlserver.py | 2 +- kart/sqlalchemy/sqlserver.py | 4 +- kart/structure.py | 8 +- kart/tabular/import_source.py | 12 +- kart/tabular/rich_table_dataset.py | 2 +- kart/tabular/v2.py | 3 +- kart/tabular/v3.py | 4 +- kart/tabular/v3_paths.py | 4 + kart/tabular/working_copy/base.py | 8 +- kart/tabular/working_copy/db_server.py | 6 +- kart/tabular/working_copy/gpkg.py | 3 - kart/tabular/working_copy/sqlserver.py | 2 +- kart/tile/importer.py | 2 +- kart/tile/tile_dataset.py | 10 +- kart/utils.py | 8 + mypy.ini | 34 +++ requirements/dev.in | 9 +- requirements/dev.txt | 69 +++-- 42 files changed, 699 insertions(+), 597 deletions(-) delete mode 100644 kart/dataset_mixins.py create mode 100644 kart/raw_diff_delta.py create mode 100644 mypy.ini diff --git a/kart/__init__.py b/kart/__init__.py index b6f471901..b360e1eec 100644 --- a/kart/__init__.py +++ b/kart/__init__.py @@ -12,7 +12,6 @@ import importlib import logging import os -import platform import sys L = logging.getLogger("kart.__init__") @@ -40,7 +39,7 @@ ) try: - import _kart_env + import _kart_env # type: ignore[import] L.debug("Found _kart_env configuration module") except ImportError: @@ -48,9 +47,9 @@ _kart_env = None is_frozen = getattr(sys, "frozen", None) and hasattr(sys, "_MEIPASS") -is_darwin = platform.system() == "Darwin" -is_linux = platform.system() == "Linux" -is_windows = platform.system() == "Windows" +is_darwin = sys.platform == "darwin" +is_linux = sys.platform == "linux" +is_windows = sys.platform == "win32" if is_darwin: libsuffix = "dylib" @@ -146,7 +145,7 @@ def _env_path(path): os.environ["PATH"] = ( os.pathsep.join(path_extras) + os.pathsep + os.environ.get("PATH", "") ) -if is_windows: +if sys.platform == "win32": os.add_dll_directory(prefix if is_frozen else os.path.join(prefix, "lib")) # FIXME: git2.dll is in the package directory, but isn't setup for ctypes to use _pygit2_spec = importlib.util.find_spec("pygit2") diff --git a/kart/annotations/db.py b/kart/annotations/db.py index 92d9beb53..a771a2fbe 100644 --- a/kart/annotations/db.py +++ b/kart/annotations/db.py @@ -2,7 +2,7 @@ import json import logging import threading - +from typing import TypeAlias import sqlalchemy from kart.sqlalchemy.sqlite import sqlite_engine @@ -13,7 +13,9 @@ from sqlalchemy.schema import CreateTable L = logging.getLogger(__name__) -Base = declarative_base() + +# TODO: remove this type ignore when we upgrade sqlalchemy to 2.0 and replace with DeclarativeBase +Base: TypeAlias = declarative_base() # type: ignore[valid-type] class KartAnnotation(Base): diff --git a/kart/base_dataset.py b/kart/base_dataset.py index 0875bcf01..73d0594cb 100644 --- a/kart/base_dataset.py +++ b/kart/base_dataset.py @@ -1,14 +1,19 @@ import functools import logging import sys +from collections.abc import Iterable +from typing import Callable, Optional, Protocol, TYPE_CHECKING, ClassVar import click import pygit2 from kart.core import all_blobs_in_tree, all_blobs_with_paths_in_tree -from kart.dataset_mixins import DatasetDiffMixin +from kart.diff_format import DiffFormat +from kart.diff_structs import DatasetDiff, DeltaDiff, Delta from kart.exceptions import InvalidOperation, UNSUPPORTED_VERSION, PATCH_DOES_NOT_APPLY +from kart.key_filters import DatasetKeyFilter, MetaKeyFilter, UserStringKeyFilter from kart.meta_items import MetaItemFileType, MetaItemVisibility +from kart.raw_diff_delta import RawDiffDelta class BaseDatasetMetaClass(type): @@ -33,7 +38,7 @@ def __new__(*args): return dataset_cls -class BaseDataset(DatasetDiffMixin, metaclass=BaseDatasetMetaClass): +class BaseDataset(metaclass=BaseDatasetMetaClass): """ Common interface for all datasets. @@ -52,24 +57,26 @@ class BaseDataset(DatasetDiffMixin, metaclass=BaseDatasetMetaClass): # Subclasses should override these fields to properly implement the dataset interface. - DATASET_TYPE = None # Example: "hologram" - VERSION = None # Example: 1 - DATASET_DIRNAME = None # Example: ".hologram-dataset.v1". + DATASET_TYPE: ClassVar[str] # Example: "hologram" + VERSION: ClassVar[int] # Example: 1 + DATASET_DIRNAME: ClassVar[str | None] = None # Example: ".hologram-dataset.v1". # (This should match the pattern DATASET_DIRNAME_PATTERN in kart.structure.) # The name / type of main items that make up this dataset - ie, not the meta items. - ITEM_TYPE = None + ITEM_TYPE: ClassVar[str | None] = None - WORKING_COPY_PART_TYPE = None # Example: working_copy.PartType.TABULAR + WORKING_COPY_PART_TYPE: ClassVar["PartType | None"] = ( + None # Example: working_copy.PartType.TABULAR + ) # Paths are generally relative to self.inner_tree, but table datasets put certain meta items in the outer-tree. # (This is an anti-pattern - when designing a new dataset type, don't put meta items in the outer-tree.) # Where meta-items are stored - blobs containing metadata about the structure or schema of the dataset. - META_PATH = "meta/" + META_PATH: ClassVar[str] = "meta/" # No meta-items are defined by default - subclasses have to define these themselves. - META_ITEMS = () + META_ITEMS: ClassVar[tuple["MetaItemDefinition", ...]] = () @classmethod def is_dataset_tree(cls, tree): @@ -119,6 +126,317 @@ def __init__(self, tree, path, repo, dirname=None): self.ensure_only_supported_capabilities() + def diff( + self, + other: Optional["BaseDataset"], + ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL, + reverse: bool = False, + diff_format: DiffFormat = DiffFormat.FULL, + ): + """ + Generates a Diff from self -> other. + If reverse is true, generates a diff from other -> self. + """ + ds_diff = DatasetDiff() + meta_filter = ds_filter.get("meta", ds_filter.child_type()) + ds_diff["meta"] = self.diff_meta(other, meta_filter, reverse=reverse) + return ds_diff + + def diff_meta( + self, + other: Optional["BaseDataset"], + meta_filter: UserStringKeyFilter = MetaKeyFilter.MATCH_ALL, + reverse: bool = False, + ): + """ + Returns a diff from self -> other, but only for meta items. + If reverse is true, generates a diff from other -> self. + """ + old: BaseDataset | None + new: BaseDataset | None + if reverse: + old, new = other, self + else: + old, new = self, other + + meta_old = ( + {k: v for k, v in old.meta_items().items() if k in meta_filter} + if old + else {} + ) + meta_new = ( + {k: v for k, v in new.meta_items().items() if k in meta_filter} + if new + else {} + ) + return DeltaDiff.diff_dicts(meta_old, meta_new) + + def diff_to_working_copy( + self, + workdir_diff_cache: "WorkdirDiffCache", + ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL, + *, + convert_to_dataset_format: bool | None = None, + ): + """ + Generates a diff from self to the working-copy. + It may be the case that only the dataset-revision used to write the working + copy can be used to do this (if we are tracking changes from that revision). + See diff_util.get_dataset_diff() to generate diffs more generally. + """ + # Subclasses to override. + pass + + def get_raw_diff_for_subtree( + self, + other: Optional["BaseDataset"], + subtree_name: str, + reverse: bool = False, + ): + """ + Get a pygit2.Diff of the diff between some subtree of this dataset, and the same subtree of another dataset + (typically actually the "same" dataset, but at a different revision). + """ + flags = pygit2.GIT_DIFF_SKIP_BINARY_CHECK + self_subtree = self.get_subtree(subtree_name) + other_subtree = other.get_subtree(subtree_name) if other else self._empty_tree + + diff = self_subtree.diff_to_tree(other_subtree, flags=flags, swap=reverse) + self.L.debug( + "diff %s (%s -> %s / %s): %s changes", + subtree_name, + self_subtree.id, + other_subtree.id if other_subtree else None, + "R" if reverse else "F", + len(diff), + ) + return diff + + def diff_subtree( + self, + other: Optional["BaseDataset"], + subtree_name: str, + key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL, + *, + key_decoder_method: str, + value_decoder_method: str, + key_encoder_method: Optional[str] = None, + reverse: bool = False, + ): + """ + A pattern for datasets to use for diffing some specific subtree. Works as follows: + 1. Take some specific subtree of self and of a other + (ie self.inner_tree / "feature", other.inner_tree / "feature") + 2. Use get_raw_diff_from_subtree to get a pygit2.Diff of the changes between those two trees. + 3. Go through all the resulting (insert, update, delete) deltas + 4. Fix up the paths to be relative to the dataset again (ie, prepend "feature/" onto them all + 5. Run some transform on each path to decide what to call each item (eg decode primary key) + 6. Run some transform on each path to load the content of each item (eg, read and decode feature) + + Args: + other - a dataset similar to self (ie the same dataset, but at a different commit). + This can be None, in which case there are no items in other and they don't need to be transformed. + subtree_name - the name of the subtree of the dataset to scan for diffs. + key_filter - deltas are only yielded if they involve at least one key that matches the key filter. + key_decoder_method, value_decoder_method - these must be names of methods that are present in both + self and other - self's methods are used to decode self's items, and other's methods for other's items. + key_encoder_method - optional. A method that is present in both self and other that allows us to go + straight to the keys the user is interested in (if they have requested specific keys in the key_filter). + reverse - normally yields deltas from self -> other, but if reverse is True, yields deltas from other -> self. + """ + + subtree_name = subtree_name.rstrip("/") + + if not key_filter.match_all and key_encoder_method is not None: + # Handle the case where we are only interested in a few features. + deltas = self.get_raw_deltas_for_keys( + other, key_encoder_method, key_filter, reverse=reverse + ) + else: + raw_diff = self.get_raw_diff_for_subtree( + other, subtree_name, reverse=reverse + ) + # NOTE - we could potentially call diff.find_similar() to detect renames here + deltas = self.wrap_deltas_from_raw_diff( + raw_diff, lambda path: f"{subtree_name}/{path}" + ) + + def _no_dataset_error(method_name): + raise RuntimeError( + f"Can't call {method_name} to decode diff deltas: dataset is None" + ) + + def get_dataset_attr(dataset, method_name): + if dataset is not None: + return getattr(dataset, method_name) + # This shouldn't happen: + return lambda x: _no_dataset_error(method_name) + + old: BaseDataset | None + new: BaseDataset | None + if reverse: + old, new = other, self + else: + old, new = self, other + + yield from self.transform_raw_deltas( + deltas, + key_filter, + old_key_transform=get_dataset_attr(old, key_decoder_method), + old_value_transform=get_dataset_attr(old, value_decoder_method), + new_key_transform=get_dataset_attr(new, key_decoder_method), + new_value_transform=get_dataset_attr(new, value_decoder_method), + ) + + # We treat UNTRACKED like an ADD since we don't have a staging area - + # if the user has untracked files, we have to assume they want to add them. + # (This is not actually needed right now since we are not using this for working copy diffs). + _INSERT_TYPES = (pygit2.GIT_DELTA_ADDED, pygit2.GIT_DELTA_UNTRACKED) + _UPDATE_TYPES = (pygit2.GIT_DELTA_MODIFIED,) + _DELETE_TYPES = (pygit2.GIT_DELTA_DELETED,) + + _INSERT_UPDATE_DELETE = _INSERT_TYPES + _UPDATE_TYPES + _DELETE_TYPES + _INSERT_UPDATE = _INSERT_TYPES + _UPDATE_TYPES + _UPDATE_DELETE = _UPDATE_TYPES + _DELETE_TYPES + + def get_raw_deltas_for_keys( + self, + other: Optional["BaseDataset"], + key_encoder_method: str, + key_filter: UserStringKeyFilter, + reverse: bool = False, + ): + """ + Since we know which keys we are looking for, we can encode those keys and look up those blobs directly. + We output this as a series of deltas, just as we do when we run a normal diff, so that we can + take output from either code path and use it to generate a kart diff using transform_raw_deltas. + """ + + def _expand_keys(keys): + # If the user asks for mydataset:feature:123 they might mean str("123") or int(123) - which + # would be encoded differently. We look up both paths to see what we can find. + for key in keys: + yield key + if isinstance(key, str) and key.isdigit(): + yield int(key) + + encode_fn = getattr(self, key_encoder_method) + paths = set() + for key in _expand_keys(key_filter): + try: + paths.add(encode_fn(key, relative=True)) + except TypeError: + # The path encoder for this dataset can't encode that key, so it won't be there. + continue + + old: BaseDataset | None + new: BaseDataset | None + if reverse: + old, new = other, self + else: + old, new = self, other + + def _get_blob(dataset, path): + if dataset is None or dataset.inner_tree is None: + return None + try: + return dataset.inner_tree / path + except KeyError: + return None + + for path in paths: + old_blob = _get_blob(old, path) + new_blob = _get_blob(new, path) + if old_blob is None and new_blob is None: + continue + if ( + old_blob is not None + and new_blob is not None + and old_blob.oid == new_blob.oid + ): + continue + yield RawDiffDelta.of( + path if old_blob else None, path if new_blob else None + ) + + def wrap_deltas_from_raw_diff( + self, + raw_diff: pygit2.Diff, + path_transform: Callable[[str], str], + ): + for delta in raw_diff.deltas: + old_path = path_transform(delta.old_file.path) if delta.old_file else None + new_path = path_transform(delta.new_file.path) if delta.new_file else None + yield RawDiffDelta(delta.status, delta.status_char(), old_path, new_path) + + def transform_raw_deltas( + self, + deltas: Iterable["RawDiffDelta"], + key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL, + *, + old_key_transform: Callable[[str], str] = lambda x: x, + old_value_transform: Callable[[str], str] = lambda x: x, + new_key_transform: Callable[[str], str] = lambda x: x, + new_value_transform: Callable[[str], str] = lambda x: x, + ): + """ + Given a list of RawDiffDeltas - inserts, updates, and deletes that happened at particular paths - + yields a list of Kart deltas, which look something like ((old_key, old_value), (new_key, new_value)). + A key could be a path, a meta-item name, or a primary key value. + + key-filter - deltas are discarded if they don't involve any keys that matches the key filter. + old/new_key_transform - converts the path into a key. + old/new_value_transform - converts the canonical-path into a value, + presumably first by loading the file contents at that path. + + If any transform is not set, that transform defaults to returning the value it was input. + """ + for d in deltas: + self.L.debug("diff(): %s %s %s", d.status_char, d.old_path, d.new_path) + + if d.status not in self._INSERT_UPDATE_DELETE: + # RENAMED, COPIED, IGNORED, TYPECHANGE, UNMODIFIED, UNREADABLE + # We don't enounter these status codes in the diffs we generate. + raise NotImplementedError(f"Delta status: {d.status_char}") + + if d.status in self._UPDATE_DELETE: + old_key = old_key_transform(d.old_path) + else: + old_key = None + + if d.status in self._INSERT_UPDATE: + new_key = new_key_transform(d.new_path) + else: + new_key = None + + if old_key not in key_filter and new_key not in key_filter: + continue + + if d.status in self._INSERT_TYPES: + self.L.debug("diff(): insert %s (%s)", d.new_path, new_key) + elif d.status in self._UPDATE_TYPES: + self.L.debug( + "diff(): update %s %s -> %s %s", + d.old_path, + old_key, + d.new_path, + new_key, + ) + elif d.status in self._DELETE_TYPES: + self.L.debug("diff(): delete %s %s", d.old_path, old_key) + + if d.status in self._UPDATE_DELETE: + old_half_delta = old_key, old_value_transform(d.old_path) + else: + old_half_delta = None + + if d.status in self._INSERT_UPDATE: + new_half_delta = new_key, new_value_transform(d.new_path) + else: + new_half_delta = None + + yield Delta(old_half_delta, new_half_delta) + def ensure_only_supported_capabilities(self): # TODO - loosen this restriction. A dataset with capabilities that we don't support should (at worst) be treated # the same as any other unsupported dataset. @@ -138,7 +456,7 @@ def ensure_only_supported_capabilities(self): ) @functools.lru_cache() - def get_subtree(self, subtree_path): + def get_subtree(self, subtree_path) -> pygit2.Tree: if self.inner_tree is not None: try: return self.inner_tree / subtree_path @@ -472,3 +790,9 @@ def decode_path(self, path): else: # It generally shouldn't happen that we have files in the top-level. return ("", rel_path) + + +if TYPE_CHECKING: + from kart.working_copy import PartType + from kart.meta_items import MetaItemDefinition + from kart.workdir import WorkdirDiffCache diff --git a/kart/base_diff_writer.py b/kart/base_diff_writer.py index 63a2fdbdc..3f109389e 100644 --- a/kart/base_diff_writer.py +++ b/kart/base_diff_writer.py @@ -4,12 +4,19 @@ import re import sys from pathlib import Path +from typing import Generator import click from kart import diff_util from kart.diff_format import DiffFormat -from kart.diff_structs import FILES_KEY, WORKING_COPY_EDIT, BINARY_FILE, Delta +from kart.diff_structs import ( + FILES_KEY, + WORKING_COPY_EDIT, + BINARY_FILE, + Delta, + DatasetDiff, +) from kart.exceptions import CrsError, InvalidOperation from kart.key_filters import RepoKeyFilter from kart import list_of_conflicts @@ -736,8 +743,51 @@ def _check_for_linked_dataset_changes(self, ds_path, ds_diff): if dataset and dataset.get_meta_item("linked-storage.json") and ds_diff: self.linked_dataset_changes.add(ds_path) + def filtered_dataset_deltas_as_geojson( + self, ds_path: str, ds_diff: DatasetDiff + ) -> Generator[dict, None, None]: + from kart.tabular.feature_output import feature_as_geojson -class FeatureDeltaFetcher: + if "feature" not in ds_diff: + return + + old_transform, new_transform = self.get_geometry_transforms(ds_path, ds_diff) + + for key, delta in self.filtered_dataset_deltas(ds_path, ds_diff): + if delta.old: + change_type = "U-" if delta.new else "D" + yield feature_as_geojson( + delta.old_value, + delta.old_key, + ds_path, + change_type, + old_transform, + ) + if delta.new: + change_type = "U+" if delta.old else "I" + yield feature_as_geojson( + delta.new_value, + delta.new_key, + ds_path, + change_type, + new_transform, + ) + + +class BaseDeltaFetcher: + def _is_delta_value_ready(self, delta_key_value): + if delta_key_value is None: + return True + try: + delta_key_value.get_lazy_value() + return True + except KeyError as e: + if object_is_promised(e): + return False + raise + + +class FeatureDeltaFetcher(BaseDeltaFetcher): """ Given a diff Delta, either reports that it is available immediately, or kicks off a fetch so that it will be available soon, and adds it to the list of buffered deltas. This lets the diff writer above first output the deltas @@ -795,19 +845,8 @@ def finish_fetching_deltas(self): self._fetch_process.finish() yield from self.buffered_deltas - def _is_delta_value_ready(self, delta_key_value): - if delta_key_value is None: - return True - try: - delta_key_value.get_lazy_value() - return True - except KeyError as e: - if object_is_promised(e): - return False - raise - -class NullDeltaFetcher: +class NullDeltaFetcher(BaseDeltaFetcher): """ Given a diff Delta, checks to make sure that it is immediately available. If it is not, outputs an error message. """ @@ -834,6 +873,3 @@ def ensure_delta_is_ready_or_start_fetch(self, key, delta): def finish_fetching_deltas(self): # Nothing to do here. yield from () - - -NullDeltaFetcher._is_delta_value_ready = FeatureDeltaFetcher._is_delta_value_ready diff --git a/kart/cli_util.py b/kart/cli_util.py index a303bbf90..a5a28a4a6 100644 --- a/kart/cli_util.py +++ b/kart/cli_util.py @@ -114,7 +114,7 @@ def render_posix(command_path: str) -> None: p.communicate() -def render_windows(command_path: str) -> bytes: +def render_windows(command_path: str) -> None: from kart import prefix text_page = Path(prefix) / "help" / f'{command_path.replace(" ", "-")}' @@ -155,8 +155,8 @@ def login(ctx=None, username:str=None, password:str=None, token:str=None) -> Non print("...do what you like with the params you got...") """ - def __init__(self, *args, **kwargs): - self.exclusive_with: list = kwargs.pop("exclusive_with") + def __init__(self, *args, exclusive_with: list[str], **kwargs): + self.exclusive_with = exclusive_with assert self.exclusive_with, "'exclusive_with' parameter required" kwargs["help"] = ( @@ -167,7 +167,7 @@ def __init__(self, *args, **kwargs): ).strip() super().__init__(*args, **kwargs) - def handle_parse_result(self, ctx, opts, args): + def handle_parse_result(self, ctx, opts, args: list[str]): current_opt: bool = self.name in opts for other_name in self.exclusive_with: if other_name in opts: @@ -179,7 +179,7 @@ def handle_parse_result(self, ctx, opts, args): f"is mutually exclusive with {other.get_error_hint(ctx)}." ) else: - self.prompt = None + self.prompt = "" return super().handle_parse_result(ctx, opts, args) diff --git a/kart/completion.py b/kart/completion.py index 8a3f68ae9..d4e966415 100644 --- a/kart/completion.py +++ b/kart/completion.py @@ -12,10 +12,7 @@ from kart import subprocess_util as subprocess -try: - import shellingham -except ImportError: - shellingham = None +import shellingham def provide_default_shell(): @@ -190,12 +187,14 @@ def install_powershell(*, prog_name: str, complete_var: str, shell: str) -> Path def install_tab_completion( shell: Optional[str] = None, ) -> Tuple[str, Path]: - if shell is None and shellingham is not None: + if shell is None: try: shell, _ = shellingham.detect_shell() except shellingham.ShellDetectionFailure: shell = os.path.basename(provide_default_shell()) + assert shell + # Hardcode these variables - kart helper means that click can get mixed up between KART and KART_CLI, # but this means we always use KART in practise. kwargs = {"prog_name": "kart", "complete_var": "_KART_COMPLETE", "shell": shell} diff --git a/kart/conflicts_util.py b/kart/conflicts_util.py index 3dbd5ea6a..27d2556de 100644 --- a/kart/conflicts_util.py +++ b/kart/conflicts_util.py @@ -78,7 +78,7 @@ def _path_part_sort_key(path_part): return "N", path_part -def conflicts_json_as_text(value, path="", level=0) -> str: +def conflicts_json_as_text(value: str | int | dict | list, path="", level=0) -> str: """Converts the JSON output of list_conflicts to a string. The conflicts themselves should already be in the appropriate format - @@ -96,6 +96,8 @@ def conflicts_json_as_text(value, path="", level=0) -> str: elif isinstance(value, list): indent = " " * level return "".join(f"{indent}{path}{item}\n" for item in value) + else: + raise ValueError(f"Unexpected value type: {type(value)}") def item_to_text(key: str, value: dict, path: str, level: int) -> str: @@ -110,7 +112,7 @@ def item_to_text(key: str, value: dict, path: str, level: int) -> str: return f"{styled_key_text}\n{value_text}" -def get_key_text_color(key_text: str) -> str: +def get_key_text_color(key_text: str) -> str | None: """Takes a given path and outputs an appropriate style for it The format for the key_text is: @@ -125,3 +127,4 @@ def get_key_text_color(key_text: str) -> str: for key, color in style.items(): if key_text.endswith(key): return color + return None diff --git a/kart/conflicts_writer.py b/kart/conflicts_writer.py index 713d5e885..acc308847 100644 --- a/kart/conflicts_writer.py +++ b/kart/conflicts_writer.py @@ -3,8 +3,9 @@ import sys import click import pygit2 +from typing import ClassVar -from typing import Union, Type, List +from typing import Union, Type, List, Iterable from pathlib import Path from .conflicts_util import ( @@ -23,6 +24,7 @@ ensure_conflicts_ready, ) from .output_util import dump_json_output, resolve_output_path +from .repo import KartRepo from . import diff_util L = logging.getLogger("kart.conflicts_writer") @@ -33,25 +35,25 @@ class BaseConflictsWriter: A base class for writing conflicts output. """ + output_format: ClassVar[str] + def __init__( self, - repo: pygit2.Repository, + repo: KartRepo, user_key_filters: tuple = (), output_path: str = "-", summarise: int = 0, flat: bool = False, *, json_style: str = "pretty", - target_crs: CoordinateReferenceString = None, - merged_index: MergedIndex = None, - merge_context: MergeContext = None, + target_crs: CoordinateReferenceString | None = None, + merged_index: MergedIndex | None = None, + merge_context: MergeContext | None = None, ): self.repo = repo self.target_crs = target_crs self.flat = flat self.summarise = summarise - self.merge_context = merge_context - self.merged_index = merged_index self.repo_key_filter = RepoKeyFilter.build_from_user_patterns(user_key_filters) self.json_style = json_style self.output_path = self._check_output_path( @@ -65,12 +67,14 @@ def __init__( ) if merged_index is None: - self.merged_index = MergedIndex.read_from_repo(repo) + merged_index = MergedIndex.read_from_repo(repo) + self.merged_index = merged_index if merge_context is None: - self.merge_context = MergeContext.read_from_repo(repo) + merge_context = MergeContext.read_from_repo(repo) + self.merge_context = merge_context @classmethod - def _normalize_output_path(cls, output_path: str) -> Union[str, Path]: + def _normalize_output_path(cls, output_path: str | Path) -> str | Path: if not output_path or output_path == "-": return output_path if isinstance(output_path, str): @@ -82,23 +86,20 @@ def _normalize_output_path(cls, output_path: str) -> Union[str, Path]: @classmethod def get_conflicts_writer_class( cls, output_format: str - ) -> Union[Type[BaseConflictsWriter], None]: + ) -> Type[BaseConflictsWriter] | None: """Returns suitable subclass for desired output format""" - output_format_to_writer = { - "quiet": QuietConflictsWriter, - "json": JsonConflictsWriter, - "text": TextConflictsWriter, - "geojson": GeojsonConflictsWriter, - } - - cls.output_format = output_format - if not output_format_to_writer.get(output_format): - raise click.BadParameter( - f"Unrecognized output format: {output_format}", - param_hint="output_format", - ) - - return output_format_to_writer.get(output_format) + for klass in ( + QuietConflictsWriter, + JsonConflictsWriter, + TextConflictsWriter, + GeojsonConflictsWriter, + ): + if klass.output_format == output_format: + return klass + raise click.BadParameter( + f"Unrecognized output format: {output_format}", + param_hint="output_format", + ) def get_conflicts(self) -> List[RichConflict]: """Returns a list of rich conflicts""" @@ -112,13 +113,12 @@ def get_conflicts(self) -> List[RichConflict]: def _check_output_path( cls, repo: pygit2.Repository, - output_path: Union[str, Path], - output_format: str = None, - ) -> Union[str, Path]: + output_path: str | Path, + ) -> str | Path: """Make sure the given output_path is valid for this implementation (ie, are directories supported).""" - if output_format and isinstance(output_path, Path) and output_path.is_dir(): + if isinstance(output_path, Path) and output_path.is_dir(): raise click.BadParameter( - f"Directory is not valid for --output with -o {output_format}", + f"Directory is not valid for --output with -o {cls.output_format}", param_hint="--output", ) return output_path @@ -152,7 +152,7 @@ def list_conflicts(self) -> dict: output_dict = {} conflict_output = _CONFLICT_PLACEHOLDER - conflicts = self.get_conflicts() + conflicts: Iterable[RichConflict] = self.get_conflicts() if not self.repo_key_filter.match_all: conflicts = (c for c in conflicts if c.matches_filter(self.repo_key_filter)) @@ -183,6 +183,8 @@ def list_conflicts(self) -> dict: class QuietConflictsWriter(BaseConflictsWriter): + output_format = "quiet" + def write_conflicts(self): pass @@ -197,15 +199,16 @@ class JsonConflictsWriter(BaseConflictsWriter): {"kart.conflicts/v1": {"dataset-path": { "feature": { "id": { "ancestor/ours/theirs": [...]}}}}} """ + output_format = "json" + @classmethod def _check_output_path( cls, repo: pygit2.Repository, - output_path: Union[str, Path], - output_format: str = "json", - ) -> Union[str, Path]: + output_path: str | Path, + ) -> str | Path: """Make sure the given output_path is valid for this implementation (ie, are directories supported).""" - return super()._check_output_path(repo, output_path, output_format) + return super()._check_output_path(repo, output_path) def write_conflicts(self): output_obj = super().list_conflicts() @@ -226,6 +229,8 @@ class TextConflictsWriter(BaseConflictsWriter): the changes on the ancestor, theirs and ours branch. """ + output_format = "text" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fp = resolve_output_path(self.output_path) @@ -235,11 +240,10 @@ def __init__(self, *args, **kwargs): def _check_output_path( cls, repo: pygit2.Repository, - output_path: Union[str, Path], - output_format: str = "text", - ) -> Union[str, Path]: + output_path: str | Path, + ) -> str | Path: """Make sure the given output_path is valid for this implementation (ie, are directories supported).""" - return super()._check_output_path(repo, output_path, output_format) + return super()._check_output_path(repo, output_path) def write_conflicts(self): output_dict = super().list_conflicts() @@ -262,13 +266,19 @@ class GeojsonConflictsWriter(BaseConflictsWriter): Meta conflicts aren't output at all. """ + output_format = "geojson" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.flat = True self.summarise = 0 @classmethod - def _check_output_path(cls, repo: pygit2.Repository, output_path: Path): + def _check_output_path( + cls, + repo: pygit2.Repository, + output_path: str | Path, + ) -> str | Path: """Make sure the given output_path is valid for this implementation (ie, are directories supported).""" if isinstance(output_path, Path): if output_path.is_file(): @@ -298,7 +308,7 @@ def write_conflicts(self) -> None: "Need to specify a directory via --output for GeoJSON with more than one dataset", param_hint="--output", ) - conflicts = self.get_conflicts() + conflicts: Iterable[RichConflict] = self.get_conflicts() if not self.repo_key_filter.match_all: conflicts = (c for c in conflicts if c.matches_filter(self.repo_key_filter)) @@ -308,7 +318,7 @@ def write_conflicts(self) -> None: geojson_conflicts = self.get_geojson_conflicts(conflicts) self.output_geojson_conflicts(geojson_conflicts) - def get_geojson_conflicts(self, conflicts: List[RichConflict]) -> dict: + def get_geojson_conflicts(self, conflicts: Iterable[RichConflict]) -> dict: """Returns geojson conflicts as a dict""" output_dict = {} for conflict in conflicts: @@ -328,7 +338,9 @@ def output_geojson_conflicts(self, json_obj: dict) -> None: conflicts = self.separate_geojson_conflicts_by_ds(json_obj) for ds_path, features in conflicts.items(): - if self.output_path == "-": + ds_output_path: Path | str + if isinstance(self.output_path, str): + assert self.output_path == "-" ds_output_path = "-" else: ds_output_filename = str(ds_path).replace("/", "__") + ".geojson" @@ -341,9 +353,9 @@ def output_geojson_conflicts(self, json_obj: dict) -> None: json_style=self.json_style, ) - def separate_geojson_conflicts_by_ds(self, json_obj: dict) -> dict: + def separate_geojson_conflicts_by_ds(self, json_obj: dict[str, dict]) -> dict: """Separates geojson conflicts by datasets""" - conflicts = dict() + conflicts: dict[str, list[dict]] = dict() for key, feature in json_obj.items(): if "meta" == key.split(":")[1]: continue diff --git a/kart/dataset_mixins.py b/kart/dataset_mixins.py deleted file mode 100644 index 8e9aad785..000000000 --- a/kart/dataset_mixins.py +++ /dev/null @@ -1,359 +0,0 @@ -from collections.abc import Iterable -from typing import Callable, Optional, TYPE_CHECKING - -import pygit2 - -from kart.diff_structs import DatasetDiff, DeltaDiff, Delta -from kart.diff_format import DiffFormat -from kart.key_filters import DatasetKeyFilter, MetaKeyFilter, UserStringKeyFilter - - -class DatasetDiffMixin: - """ - This mixin should be added to a dataset to add diffing functionality. - - self.diff_meta() should work "out of the box" as long as self.meta_items() is implemented. - self.diff_subtree() can be called with appropriate args to generate diffs of dataset contents, eg, features. - """ - - # Returns the meta-items diff for this dataset. - def diff( - self: "BaseDataset", - other: Optional["BaseDataset"], - ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL, - reverse: bool = False, - diff_format: DiffFormat = DiffFormat.FULL, - ): - """ - Generates a Diff from self -> other. - If reverse is true, generates a diff from other -> self. - """ - ds_diff = DatasetDiff() - meta_filter = ds_filter.get("meta", ds_filter.child_type()) - ds_diff["meta"] = self.diff_meta(other, meta_filter, reverse=reverse) - return ds_diff - - def diff_meta( - self: "BaseDataset", - other: Optional["BaseDataset"], - meta_filter: UserStringKeyFilter = MetaKeyFilter.MATCH_ALL, - reverse: bool = False, - ): - """ - Returns a diff from self -> other, but only for meta items. - If reverse is true, generates a diff from other -> self. - """ - if reverse: - old, new = other, self - else: - old, new = self, other - - meta_old = ( - {k: v for k, v in old.meta_items().items() if k in meta_filter} - if old - else {} - ) - meta_new = ( - {k: v for k, v in new.meta_items().items() if k in meta_filter} - if new - else {} - ) - return DeltaDiff.diff_dicts(meta_old, meta_new) - - def diff_to_working_copy( - self: "BaseDataset", - workdir_diff_cache: "WorkdirDiffCache", - ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL, - *, - convert_to_dataset_format: bool = None, - ): - """ - Generates a diff from self to the working-copy. - It may be the case that only the dataset-revision used to write the working - copy can be used to do this (if we are tracking changes from that revision). - See diff_util.get_dataset_diff() to generate diffs more generally. - """ - # Subclasses to override. - pass - - def get_raw_diff_for_subtree( - self: "BaseDataset", - other: Optional["BaseDataset"], - subtree_name: str, - reverse: bool = False, - ): - """ - Get a pygit2.Diff of the diff between some subtree of this dataset, and the same subtree of another dataset - (typically actually the "same" dataset, but at a different revision). - """ - flags = pygit2.GIT_DIFF_SKIP_BINARY_CHECK - self_subtree = self.get_subtree(subtree_name) - other_subtree = other.get_subtree(subtree_name) if other else self._empty_tree - - diff = self_subtree.diff_to_tree(other_subtree, flags=flags, swap=reverse) - self.L.debug( - "diff %s (%s -> %s / %s): %s changes", - subtree_name, - self_subtree.id, - other_subtree.id if other_subtree else None, - "R" if reverse else "F", - len(diff), - ) - return diff - - def diff_subtree( - self: "BaseDataset", - other: Optional["BaseDataset"], - subtree_name: str, - key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL, - *, - key_decoder_method: str, - value_decoder_method: str, - key_encoder_method: Optional[str] = None, - reverse: bool = False, - ): - """ - A pattern for datasets to use for diffing some specific subtree. Works as follows: - 1. Take some specific subtree of self and of a other - (ie self.inner_tree / "feature", other.inner_tree / "feature") - 2. Use get_raw_diff_from_subtree to get a pygit2.Diff of the changes between those two trees. - 3. Go through all the resulting (insert, update, delete) deltas - 4. Fix up the paths to be relative to the dataset again (ie, prepend "feature/" onto them all - 5. Run some transform on each path to decide what to call each item (eg decode primary key) - 6. Run some transform on each path to load the content of each item (eg, read and decode feature) - - Args: - other - a dataset similar to self (ie the same dataset, but at a different commit). - This can be None, in which case there are no items in other and they don't need to be transformed. - subtree_name - the name of the subtree of the dataset to scan for diffs. - key_filter - deltas are only yielded if they involve at least one key that matches the key filter. - key_decoder_method, value_decoder_method - these must be names of methods that are present in both - self and other - self's methods are used to decode self's items, and other's methods for other's items. - key_encoder_method - optional. A method that is present in both self and other that allows us to go - straight to the keys the user is interested in (if they have requested specific keys in the key_filter). - reverse - normally yields deltas from self -> other, but if reverse is True, yields deltas from other -> self. - """ - - subtree_name = subtree_name.rstrip("/") - - if not key_filter.match_all and key_encoder_method is not None: - # Handle the case where we are only interested in a few features. - deltas = self.get_raw_deltas_for_keys( - other, key_encoder_method, key_filter, reverse=reverse - ) - else: - raw_diff = self.get_raw_diff_for_subtree( - other, subtree_name, reverse=reverse - ) - # NOTE - we could potentially call diff.find_similar() to detect renames here - deltas = self.wrap_deltas_from_raw_diff( - raw_diff, lambda path: f"{subtree_name}/{path}" - ) - - def _no_dataset_error(method_name): - raise RuntimeError( - f"Can't call {method_name} to decode diff deltas: dataset is None" - ) - - def get_dataset_attr(dataset, method_name): - if dataset is not None: - return getattr(dataset, method_name) - # This shouldn't happen: - return lambda x: _no_dataset_error(method_name) - - if reverse: - old, new = other, self - else: - old, new = self, other - - yield from self.transform_raw_deltas( - deltas, - key_filter, - old_key_transform=get_dataset_attr(old, key_decoder_method), - old_value_transform=get_dataset_attr(old, value_decoder_method), - new_key_transform=get_dataset_attr(new, key_decoder_method), - new_value_transform=get_dataset_attr(new, value_decoder_method), - ) - - # We treat UNTRACKED like an ADD since we don't have a staging area - - # if the user has untracked files, we have to assume they want to add them. - # (This is not actually needed right now since we are not using this for working copy diffs). - _INSERT_TYPES = (pygit2.GIT_DELTA_ADDED, pygit2.GIT_DELTA_UNTRACKED) - _UPDATE_TYPES = (pygit2.GIT_DELTA_MODIFIED,) - _DELETE_TYPES = (pygit2.GIT_DELTA_DELETED,) - - _INSERT_UPDATE_DELETE = _INSERT_TYPES + _UPDATE_TYPES + _DELETE_TYPES - _INSERT_UPDATE = _INSERT_TYPES + _UPDATE_TYPES - _UPDATE_DELETE = _UPDATE_TYPES + _DELETE_TYPES - - def get_raw_deltas_for_keys( - self: "BaseDataset", - other: Optional["BaseDataset"], - key_encoder_method: str, - key_filter: UserStringKeyFilter, - reverse: bool = False, - ): - """ - Since we know which keys we are looking for, we can encode those keys and look up those blobs directly. - We output this as a series of deltas, just as we do when we run a normal diff, so that we can - take output from either code path and use it to generate a kart diff using transform_raw_deltas. - """ - - def _expand_keys(keys): - # If the user asks for mydataset:feature:123 they might mean str("123") or int(123) - which - # would be encoded differently. We look up both paths to see what we can find. - for key in keys: - yield key - if isinstance(key, str) and key.isdigit(): - yield int(key) - - encode_fn = getattr(self, key_encoder_method) - paths = set() - for key in _expand_keys(key_filter): - try: - paths.add(encode_fn(key, relative=True)) - except TypeError: - # The path encoder for this dataset can't encode that key, so it won't be there. - continue - - if reverse: - old, new = other, self - else: - old, new = self, other - - def _get_blob(dataset, path): - if dataset is None or dataset.inner_tree is None: - return None - try: - return dataset.inner_tree / path - except KeyError: - return None - - for path in paths: - old_blob = _get_blob(old, path) - new_blob = _get_blob(new, path) - if old_blob is None and new_blob is None: - continue - if ( - old_blob is not None - and new_blob is not None - and old_blob.oid == new_blob.oid - ): - continue - yield RawDiffDelta.of( - path if old_blob else None, path if new_blob else None - ) - - def wrap_deltas_from_raw_diff( - self: "BaseDataset", raw_diff: pygit2.Diff, path_transform: Callable[[str], str] - ): - for delta in raw_diff.deltas: - old_path = path_transform(delta.old_file.path) if delta.old_file else None - new_path = path_transform(delta.new_file.path) if delta.new_file else None - yield RawDiffDelta(delta.status, delta.status_char(), old_path, new_path) - - def transform_raw_deltas( - self: "BaseDataset", - deltas: Iterable["RawDiffDelta"], - key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL, - *, - old_key_transform: Callable[[str], str] = lambda x: x, - old_value_transform: Callable[[str], str] = lambda x: x, - new_key_transform: Callable[[str], str] = lambda x: x, - new_value_transform: Callable[[str], str] = lambda x: x, - ): - """ - Given a list of RawDiffDeltas - inserts, updates, and deletes that happened at particular paths - - yields a list of Kart deltas, which look something like ((old_key, old_value), (new_key, new_value)). - A key could be a path, a meta-item name, or a primary key value. - - key-filter - deltas are discarded if they don't involve any keys that matches the key filter. - old/new_key_transform - converts the path into a key. - old/new_value_transform - converts the canonical-path into a value, - presumably first by loading the file contents at that path. - - If any transform is not set, that transform defaults to returning the value it was input. - """ - for d in deltas: - self.L.debug("diff(): %s %s %s", d.status_char, d.old_path, d.new_path) - - if d.status not in self._INSERT_UPDATE_DELETE: - # RENAMED, COPIED, IGNORED, TYPECHANGE, UNMODIFIED, UNREADABLE - # We don't enounter these status codes in the diffs we generate. - raise NotImplementedError(f"Delta status: {d.status_char}") - - if d.status in self._UPDATE_DELETE: - old_key = old_key_transform(d.old_path) - else: - old_key = None - - if d.status in self._INSERT_UPDATE: - new_key = new_key_transform(d.new_path) - else: - new_key = None - - if old_key not in key_filter and new_key not in key_filter: - continue - - if d.status in self._INSERT_TYPES: - self.L.debug("diff(): insert %s (%s)", d.new_path, new_key) - elif d.status in self._UPDATE_TYPES: - self.L.debug( - "diff(): update %s %s -> %s %s", - d.old_path, - old_key, - d.new_path, - new_key, - ) - elif d.status in self._DELETE_TYPES: - self.L.debug("diff(): delete %s %s", d.old_path, old_key) - - if d.status in self._UPDATE_DELETE: - old_half_delta = old_key, old_value_transform(d.old_path) - else: - old_half_delta = None - - if d.status in self._INSERT_UPDATE: - new_half_delta = new_key, new_value_transform(d.new_path) - else: - new_half_delta = None - - yield Delta(old_half_delta, new_half_delta) - - -class RawDiffDelta: - """ - Just like pygit2.DiffDelta, this simply stores the fact that a particular git blob has changed. - Exactly how it is changed is not stored - just the status and the blob paths. - Contrast with diff_structs.Delta, which is higher level - it stores information about - a particular feature or meta-item that has changed, and exposes the values it has changed from and to. - - This is needed to fill the same purpose as pygit2.DiffDelta because pygit2.DiffDelta's can't be - created except by running a pygit2 diff - which we don't always want to do when generating diff deltas: - see get_raw_deltas_for_keys. - """ - - __slots__ = ["status", "status_char", "old_path", "new_path"] - - def __init__(self, status, status_char, old_path, new_path): - self.status = status - self.status_char = status_char - self.old_path = old_path - self.new_path = new_path - - @classmethod - def of(cls, old_path, new_path, reverse=False): - if reverse: - old_path, new_path = new_path, old_path - - if old_path is None: - return RawDiffDelta(pygit2.GIT_DELTA_ADDED, "A", old_path, new_path) - elif new_path is None: - return RawDiffDelta(pygit2.GIT_DELTA_DELETED, "D", old_path, new_path) - else: - return RawDiffDelta(pygit2.GIT_DELTA_MODIFIED, "M", old_path, new_path) - - -if TYPE_CHECKING: - # This is here to avoid circular imports - from kart.base_dataset import BaseDataset, WorkdirDiffCache diff --git a/kart/dataset_util.py b/kart/dataset_util.py index 4aeabe2c3..31e408a3f 100644 --- a/kart/dataset_util.py +++ b/kart/dataset_util.py @@ -81,7 +81,7 @@ def _validate_dataset_path(path: str) -> None: def validate_dataset_paths(paths: list[str]) -> None: - existing_paths_lower = {} + existing_paths_lower: dict[str, str] = {} for path in paths: _validate_dataset_path(path) path_lower = path.casefold() diff --git a/kart/diff.py b/kart/diff.py index 958352d1c..5f7a7a6c1 100644 --- a/kart/diff.py +++ b/kart/diff.py @@ -132,7 +132,7 @@ def feature_count_diff( ) @click.option( "--diff-format", - type=click.Choice(DiffFormat), + type=click.Choice(list(DiffFormat)), default=DiffFormat.FULL, help="Choose the diff format: \n'full' for full diff or 'no-data-changes' for metadata and a bool indicating the feature/tile tree changes.", ) diff --git a/kart/diff_structs.py b/kart/diff_structs.py index b1c0da75f..5ff7b89a1 100644 --- a/kart/diff_structs.py +++ b/kart/diff_structs.py @@ -145,7 +145,7 @@ def key(self): # This mostly works, but isn't perfect when renames are involved. return self.old_key if self.old_key is not None else self.new_key - def __add__(self, other): + def __add__(self, other: "Delta"): """Concatenate this delta with the subsequent delta, return the result as a single delta.""" # Note: this method assumes that the deltas being concatenated are related, # ie that self.new == other.old. Don't try to concatenate arbitrary deltas together. @@ -237,7 +237,7 @@ class RichDict(UserDict): or recursive_set, even if this involves creating extra dicts to contain the new value. """ - child_type = None + child_type: type | tuple[type, ...] | None = None def __init__(self, *args, **kwargs): if type(self) == RichDict: @@ -375,7 +375,7 @@ def __invert__(self): result[key] = ~value return result - def __add__(self, other, result=None): + def __add__(self, other: "Diff"): """Concatenate this Diff to the subsequent Diff, by concatenating all children with matching keys.""" # FIXME: this algorithm isn't perfect when renames are involved. @@ -383,8 +383,7 @@ def __add__(self, other, result=None): if type(self) != type(other): raise TypeError(f"Diff type mismatch: {type(self)} != {type(other)}") - if result is None: - result = self.empty_copy() + result = self.empty_copy() for key in self.keys() | other.keys(): lhs = self.get(key) @@ -399,7 +398,7 @@ def __add__(self, other, result=None): result[key] = lhs if lhs is not None else rhs return result - def __iadd__(self, other): + def __iadd__(self, other: "Diff"): """ Concatenate this Diff to the subsequent Diff, by concatenating all children with matching keys. Slightly faster than __add__, modifies self in place. diff --git a/kart/html_diff_writer.py b/kart/html_diff_writer.py index 856835d8e..545f47290 100644 --- a/kart/html_diff_writer.py +++ b/kart/html_diff_writer.py @@ -80,8 +80,3 @@ def substitute_into_template(cls, template, title, all_datasets_geojson): .replace(">", r"\x3e"), } ) - - -HtmlDiffWriter.filtered_dataset_deltas_as_geojson = ( - GeojsonDiffWriter.filtered_dataset_deltas_as_geojson -) diff --git a/kart/json_diff_writers.py b/kart/json_diff_writers.py index 661d9bfaa..48509fff1 100644 --- a/kart/json_diff_writers.py +++ b/kart/json_diff_writers.py @@ -3,6 +3,7 @@ import threading from datetime import datetime, timedelta, timezone from pathlib import Path +from typing import Generator import click @@ -476,31 +477,3 @@ def _warn_about_any_non_feature_diffs( f"Warning: {count} tile changes in {ds_path} aren't included in GeoJSON output", err=True, ) - - def filtered_dataset_deltas_as_geojson( - self, ds_path: str, ds_diff: DatasetDiff - ) -> Union[None, dict]: - if "feature" not in ds_diff: - return - - old_transform, new_transform = self.get_geometry_transforms(ds_path, ds_diff) - - for key, delta in self.filtered_dataset_deltas(ds_path, ds_diff): - if delta.old: - change_type = "U-" if delta.new else "D" - yield feature_as_geojson( - delta.old_value, - delta.old_key, - ds_path, - change_type, - old_transform, - ) - if delta.new: - change_type = "U+" if delta.old else "I" - yield feature_as_geojson( - delta.new_value, - delta.new_key, - ds_path, - change_type, - new_transform, - ) diff --git a/kart/key_filters.py b/kart/key_filters.py index 17c1b496a..08b45e98c 100644 --- a/kart/key_filters.py +++ b/kart/key_filters.py @@ -1,5 +1,6 @@ import fnmatch import re +from typing import ClassVar import click @@ -18,6 +19,8 @@ class UserStringKeyFilter(set): matches them against a set of strings the user has supplied. """ + MATCH_ALL: ClassVar["UserStringKeyFilter"] + def __init__(self, *args, match_all=False, **kwargs): super().__init__(*args, **kwargs) self.match_all = match_all @@ -80,6 +83,8 @@ class KeyFilterDict(RichDict): at any/all keys, and that child also matches all. """ + MATCH_ALL: ClassVar["KeyFilterDict"] + def __init__(self, *args, match_all=False, **kwargs): super().__init__(*args, **kwargs) self.match_all = match_all @@ -126,6 +131,8 @@ class DatasetKeyFilter(KeyFilterDict): for filtering meta items, features, tiles etc. """ + MATCH_ALL: ClassVar["DatasetKeyFilter"] + child_type = UserStringKeyFilter child_that_matches_all = UserStringKeyFilter(match_all=True) @@ -142,7 +149,8 @@ class RepoKeyFilter(KeyFilterDict): for filtering items in any or all datasets. """ - child_type = DatasetKeyFilter + MATCH_ALL: ClassVar["RepoKeyFilter"] + child_type: type = DatasetKeyFilter child_that_matches_all = DatasetKeyFilter(match_all=True) # https://github.com/koordinates/kart/blob/master/docs/DATASETS_v3.md#valid-dataset-names @@ -284,6 +292,9 @@ def __setitem__(self, key, value): super().__setitem__(key, value) +RepoKeyFilter.MATCH_ALL = RepoKeyFilter(match_all=True) + + class NegateKeyFilter: """ A key filter that contains whatever the given delegate does not contain, and vice versa. @@ -306,9 +317,6 @@ def __setitem__(self, key, value): raise NotImplementedError() -RepoKeyFilter.MATCH_ALL = RepoKeyFilter(match_all=True) - - class DeltaFilter(set): """ Filters parts of individual deltas - new or old values for inserts, updates, or deletes. @@ -318,6 +326,8 @@ class DeltaFilter(set): "++" is they key for new values of inserts """ + MATCH_ALL: ClassVar["DeltaFilter"] + def __init__(self, *args, match_all=False, **kwargs): super().__init__(*args, **kwargs) self.match_all = match_all diff --git a/kart/merge_util.py b/kart/merge_util.py index f2cf65992..83a73180e 100644 --- a/kart/merge_util.py +++ b/kart/merge_util.py @@ -2,6 +2,7 @@ import json import re from collections import namedtuple +from typing import ClassVar import click import pygit2 @@ -40,12 +41,11 @@ def write_merged_index_flags(repo): # Utility classes relevant to merges - used by merge command, conflicts command, resolve command. -# pygit2 always has this order - we use it too for consistency, -# and so we can meaningfully zip() our tuples with theirs -_ANCESTOR_OURS_THEIRS_ORDER = ("ancestor", "ours", "theirs") - - -class AncestorOursTheirs(namedtuple("AncestorOursTheirs", _ANCESTOR_OURS_THEIRS_ORDER)): +class AncestorOursTheirs( + # pygit2 always has this order - we use it too for consistency, + # and so we can meaningfully zip() our tuples with theirs + namedtuple("AncestorOursTheirs", ("ancestor", "ours", "theirs")) +): """ When merging two commits, we can end up with three versions of lots of things - mostly pygit2 IndexEntrys, but could also be paths, repositories, structures, datasets. @@ -53,7 +53,8 @@ class AncestorOursTheirs(namedtuple("AncestorOursTheirs", _ANCESTOR_OURS_THEIRS_ Like pygit2, we keep the 3 versions always in the same order - ancestor, ours, theirs. """ - NAMES = _ANCESTOR_OURS_THEIRS_ORDER + NAMES = ("ancestor", "ours", "theirs") + EMPTY: ClassVar["AncestorOursTheirs"] @staticmethod def partial(*, ancestor=None, ours=None, theirs=None): diff --git a/kart/meta_items.py b/kart/meta_items.py index b732fd301..042863014 100644 --- a/kart/meta_items.py +++ b/kart/meta_items.py @@ -2,6 +2,7 @@ import binascii import functools import re +from typing import ClassVar from kart import crs_util from kart.schema import Schema @@ -9,6 +10,8 @@ class TagsJsonFileType: + INSTANCE: ClassVar["TagsJsonFileType"] + # schema.json should be checked on read and write, by dropping any optional fields that are None. def decode_from_bytes(self, data): if data is None: @@ -34,6 +37,8 @@ def assert_list_of_strings(self, meta_item): class SchemaJsonFileType: + INSTANCE: ClassVar["SchemaJsonFileType"] + # schema.json should be normalised on read and write, by dropping any optional fields that are None. def decode_from_bytes(self, data): if data is None: diff --git a/kart/raw_diff_delta.py b/kart/raw_diff_delta.py new file mode 100644 index 000000000..230c8ec55 --- /dev/null +++ b/kart/raw_diff_delta.py @@ -0,0 +1,34 @@ +import pygit2 + + +class RawDiffDelta: + """ + Just like pygit2.DiffDelta, this simply stores the fact that a particular git blob has changed. + Exactly how it is changed is not stored - just the status and the blob paths. + Contrast with diff_structs.Delta, which is higher level - it stores information about + a particular feature or meta-item that has changed, and exposes the values it has changed from and to. + + This is needed to fill the same purpose as pygit2.DiffDelta because pygit2.DiffDelta's can't be + created except by running a pygit2 diff - which we don't always want to do when generating diff deltas: + see get_raw_deltas_for_keys. + """ + + __slots__ = ["status", "status_char", "old_path", "new_path"] + + def __init__(self, status, status_char, old_path, new_path): + self.status = status + self.status_char = status_char + self.old_path = old_path + self.new_path = new_path + + @classmethod + def of(cls, old_path, new_path, reverse=False): + if reverse: + old_path, new_path = new_path, old_path + + if old_path is None: + return RawDiffDelta(pygit2.GIT_DELTA_ADDED, "A", old_path, new_path) + elif new_path is None: + return RawDiffDelta(pygit2.GIT_DELTA_DELETED, "D", old_path, new_path) + else: + return RawDiffDelta(pygit2.GIT_DELTA_MODIFIED, "M", old_path, new_path) diff --git a/kart/repo.py b/kart/repo.py index 1fda13203..6ec98f1d6 100644 --- a/kart/repo.py +++ b/kart/repo.py @@ -6,7 +6,7 @@ from enum import Enum from functools import cached_property from pathlib import Path - +from typing import ClassVar import click import pygit2 @@ -61,6 +61,7 @@ class KartRepoFiles: class KartRepoState(Enum): NORMAL = "normal" MERGING = "merging" + ALL_STATES: ClassVar[tuple["KartRepoState", ...]] @classmethod def bad_state_message(cls, bad_state, allowed_states, command_extra): diff --git a/kart/schema.py b/kart/schema.py index 58fc4ffab..3e72f62ba 100644 --- a/kart/schema.py +++ b/kart/schema.py @@ -14,6 +14,7 @@ msg_unpack, sha256, ) +from kart.utils import classproperty class Legend: @@ -165,8 +166,7 @@ class ColumnSchema(dict): "length", ) - @classmethod - @property + @classproperty @functools.lru_cache(maxsize=1) def sort_order_dict(cls): return { diff --git a/kart/show.py b/kart/show.py index 6bc4f9c43..1f071e949 100644 --- a/kart/show.py +++ b/kart/show.py @@ -79,7 +79,7 @@ ) @click.option( "--diff-format", - type=click.Choice(DiffFormat), + type=click.Choice(list(DiffFormat)), default=DiffFormat.FULL, help="Choose the diff format: \n'full' for full diff, 'none' for viewing commit metadata only, or 'no-data-changes' for metadata and a bool indicating the feature/tile tree changes.", ) diff --git a/kart/spatial_filter/__init__.py b/kart/spatial_filter/__init__.py index 7c81b827b..f781aae5e 100644 --- a/kart/spatial_filter/__init__.py +++ b/kart/spatial_filter/__init__.py @@ -4,6 +4,7 @@ import re import sys from enum import Enum, auto +from typing import ClassVar import click import pygit2 @@ -454,6 +455,9 @@ class SpatialFilter: call SpatialFilter.transform_for_dataset or SpatialFilter.transform_for_crs """ + _MATCH_ALL: ClassVar["SpatialFilter"] + MATCH_ALL: ClassVar["SpatialFilter"] + @property def is_original(self): # Overridden by OriginalSpatialFilter. @@ -635,6 +639,8 @@ class OriginalSpatialFilter(SpatialFilter): That is why only OriginalSpatialFilter supports transformation. """ + _MATCH_ALL: ClassVar["OriginalSpatialFilter"] + def __init__(self, crs_spec, geometry_spec, match_all=False): if match_all: super().__init__(None, None, match_all=True) diff --git a/kart/spatial_filter/index.py b/kart/spatial_filter/index.py index 3ad781ce6..8837d2cbd 100644 --- a/kart/spatial_filter/index.py +++ b/kart/spatial_filter/index.py @@ -25,28 +25,24 @@ L = logging.getLogger("kart.spatial_filter.index") +_bulk_warns: dict[str, int] = {} +_bulk_warn_samples: dict[str, object] = {} -def buffered_bulk_warn(self, message, sample): +def buffered_bulk_warn(message, sample): """For logging lots of identical warnings, but only actually outputs once per time flush_bulk_warns is called.""" - self.bulk_warns.setdefault(message, 0) - self.bulk_warns[message] = abs(self.bulk_warns[message]) + 1 - self.bulk_warn_samples[message] = sample + _bulk_warns.setdefault(message, 0) + _bulk_warns[message] = abs(_bulk_warns[message]) + 1 + _bulk_warn_samples[message] = sample -def flush_bulk_warns(self): +def flush_bulk_warns(): """Output the number of occurrences of each type of buffered_bulk_warn message.""" - for message, occurrences in self.bulk_warns.items(): + for message, occurrences in _bulk_warns.items(): if occurrences > 0: - sample = self.bulk_warn_samples[message] - self.warn(f"{message} ({occurrences} total occurences)\nSample: {sample}") - self.bulk_warns[message] = -occurrences - - -L.buffered_bulk_warn = buffered_bulk_warn.__get__(L) -L.flush_bulk_warns = flush_bulk_warns.__get__(L) -L.bulk_warns = {} -L.bulk_warn_samples = {} + sample = _bulk_warn_samples[message] + L.warn(f"{message} ({occurrences} total occurences)\nSample: {sample}") + _bulk_warns[message] = -occurrences class CrsHelper: @@ -353,7 +349,7 @@ def update_spatial_filter_index( ): if i and progress_every and i % progress_every == 0: click.echo(f" {i:,d} features... @{time.monotonic()-t0:.1f}s") - L.flush_bulk_warns() + flush_bulk_warns() ds_path = path_match_result.group(1) transforms = crs_helper.transforms_for_dataset_at_commit( @@ -378,7 +374,7 @@ def update_spatial_filter_index( ) click.echo(f" {i:,d} features... @{time.monotonic()-t0:.1f}s") - L.flush_bulk_warns() + flush_bulk_warns() # Update indexed commits. params = [(bytes.fromhex(commit_id),) for commit_id in all_independent_commits] @@ -498,22 +494,21 @@ def _debug_encoded_envelope(arg): NO_GEOMETRY_COLUMN = object() +_legend_to_col_id: dict[str, int] = {} + def get_geometry(repo, feature_blob): legend, fields = msg_unpack(memoryview(feature_blob)) - col_id = get_geometry.legend_to_col_id.get(legend) + col_id = _legend_to_col_id.get(legend) if col_id is None: col_id = _find_geometry_column(fields) if col_id is None: return None - get_geometry.legend_to_col_id[legend] = col_id + _legend_to_col_id[legend] = col_id return fields[col_id] if col_id is not NO_GEOMETRY_COLUMN else None -get_geometry.legend_to_col_id = {} - - -def _find_geometry_column(fields): +def _find_geometry_column(fields) -> int | object | None: result = NO_GEOMETRY_COLUMN for i, field in enumerate(fields): if isinstance(field, Geometry): @@ -619,22 +614,22 @@ def get_envelope_for_indexing(geom, transforms, feature_desc): envelope = transform_minmax_envelope(minmax_envelope, transform) except CannotIndex as e: if isinstance(e, CannotIndexDueToWrongCrs) and len(transforms) > 1: - L.buffered_bulk_warn( + buffered_bulk_warn( f"Skipped obviously bad transform {transform.desc}", feature_desc, ) continue - L.buffered_bulk_warn("Skipped indexing feature", feature_desc) + buffered_bulk_warn("Skipped indexing feature", feature_desc) return None result = union_of_envelopes(result, envelope) if result is None: - L.buffered_bulk_warn("Skipped indexing feature", feature_desc) + buffered_bulk_warn("Skipped indexing feature", feature_desc) return None if not _is_valid_envelope(result): - L.buffered_bulk_warn( + buffered_bulk_warn( "Couldn't index feature - resulting envelope not valid", feature_desc ) return None diff --git a/kart/sqlalchemy/adapter/postgis.py b/kart/sqlalchemy/adapter/postgis.py index 6612feac0..98612fe76 100644 --- a/kart/sqlalchemy/adapter/postgis.py +++ b/kart/sqlalchemy/adapter/postgis.py @@ -487,7 +487,6 @@ def python_postread(self, value): return str(value) if value is not None else None -@staticmethod def is_postgis_installed(session): # Run a query to see if PostGIS is installed. query = "SELECT * FROM pg_extension WHERE extname = 'postgis';" diff --git a/kart/sqlalchemy/adapter/sqlserver.py b/kart/sqlalchemy/adapter/sqlserver.py index b5f830e34..5fbdcd023 100644 --- a/kart/sqlalchemy/adapter/sqlserver.py +++ b/kart/sqlalchemy/adapter/sqlserver.py @@ -10,7 +10,7 @@ from kart.schema import ColumnSchema, Schema from kart.utils import ungenerator from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql import quoted_name +from sqlalchemy.sql import quoted_name # type: ignore[attr-defined] from sqlalchemy.sql.functions import Function diff --git a/kart/sqlalchemy/sqlserver.py b/kart/sqlalchemy/sqlserver.py index 28c2b319e..efdf13892 100644 --- a/kart/sqlalchemy/sqlserver.py +++ b/kart/sqlalchemy/sqlserver.py @@ -3,9 +3,9 @@ from urllib.parse import urlsplit, urlunsplit import sqlalchemy -from kart.exceptions import NO_DRIVER, NotFound -from sqlalchemy.dialects.mssql.base import MSDialect, MSIdentifierPreparer +from sqlalchemy.dialects.mssql.base import MSDialect, MSIdentifierPreparer # type: ignore[attr-defined] +from kart.exceptions import NO_DRIVER, NotFound from .base import BaseDb L = logging.getLogger("kart.sqlalchemy.sqlserver") diff --git a/kart/structure.py b/kart/structure.py index 1ae947de9..e8411317e 100644 --- a/kart/structure.py +++ b/kart/structure.py @@ -21,7 +21,11 @@ from . import list_of_conflicts from .pack_util import packfile_object_builder from .schema import Schema -from .tabular.version import extra_blobs_for_version, dataset_class_for_version +from .tabular.version import ( + extra_blobs_for_version, + dataset_class_for_version, + DEFAULT_NEW_REPO_VERSION, +) from .structs import CommitWithReference from .unsupported_dataset import UnsupportedDataset from .working_copy import ALL_PART_TYPES @@ -232,7 +236,7 @@ def create_tree_from_diff( if not self.tree: # This is the first commit to this branch - we may need to add extra blobs # to the tree to mark this data as being of a particular version. - extra_blobs = extra_blobs_for_version(self.version) + extra_blobs = extra_blobs_for_version(DEFAULT_NEW_REPO_VERSION) for path, blob in extra_blobs: object_builder.insert(path, blob) diff --git a/kart/tabular/import_source.py b/kart/tabular/import_source.py index 2194c1abe..fd451c82b 100644 --- a/kart/tabular/import_source.py +++ b/kart/tabular/import_source.py @@ -73,16 +73,16 @@ def dest_path(self): return self._dest_path return self.default_dest_path() + @dest_path.setter + def dest_path(self, dest_path): + self._dest_path = self._normalise_dataset_path(dest_path) + @classmethod def _normalise_dataset_path(cls, path): # we treat back-slash and forward-slash as equivalent at import time. # (but we only ever import forward-slashes) return path.strip("/").replace("\\", "/") - @dest_path.setter - def dest_path(self, dest_path): - self._dest_path = self._normalise_dataset_path(dest_path) - def default_dest_path(self): """ The default destination path where this dataset should be imported. @@ -249,7 +249,9 @@ def get_table(value): except ValueError: if value in table_list: return value - raise click.BadParameter(f"Please enter a number between 1 and {len(table_list)} or a valid table name") + raise click.BadParameter( + f"Please enter a number between 1 and {len(table_list)} or a valid table name" + ) t_default = table_list[0] if len(table_list) == 1 else None return click.prompt( diff --git a/kart/tabular/rich_table_dataset.py b/kart/tabular/rich_table_dataset.py index 1a14e8e88..12fae876d 100644 --- a/kart/tabular/rich_table_dataset.py +++ b/kart/tabular/rich_table_dataset.py @@ -130,7 +130,7 @@ def diff( other_subtree = other.get_subtree("feature") if other else self._empty_tree data_changes = self_subtree != other_subtree - ds_diff["data_changes"]: bool = data_changes + ds_diff["data_changes"] = data_changes # Else do a full diff. else: diff --git a/kart/tabular/v2.py b/kart/tabular/v2.py index aef424185..8cb6947d6 100644 --- a/kart/tabular/v2.py +++ b/kart/tabular/v2.py @@ -1,3 +1,4 @@ +from typing import ClassVar from kart.meta_items import MetaItemDefinition, MetaItemFileType from kart.tabular.v3 import TableV3 from kart.tabular.v3_paths import PathEncoder @@ -8,7 +9,7 @@ class TableV2(TableV3): DATASET_DIRNAME = ".sno-dataset" # Old name for V2 datasets. - META_ITEMS = TableV3.META_ITEMS + ( + META_ITEMS: ClassVar[tuple[MetaItemDefinition, ...]] = TableV3.META_ITEMS + ( MetaItemDefinition("metadata/dataset.json", MetaItemFileType.JSON), ) diff --git a/kart/tabular/v3.py b/kart/tabular/v3.py index 98e11978e..b290a51b6 100644 --- a/kart/tabular/v3.py +++ b/kart/tabular/v3.py @@ -1,7 +1,7 @@ import functools import os import re - +from typing import ClassVar import pygit2 from kart.core import all_blobs_in_tree @@ -86,7 +86,7 @@ class TableV3(RichTableDataset): MetaItemVisibility.INTERNAL_ONLY, ) - META_ITEMS = ( + META_ITEMS: ClassVar[tuple[MetaItemDefinition, ...]] = ( TITLE, DESCRIPTION, TAGS_JSON, diff --git a/kart/tabular/v3_paths.py b/kart/tabular/v3_paths.py index 1088b0c76..ff4839313 100644 --- a/kart/tabular/v3_paths.py +++ b/kart/tabular/v3_paths.py @@ -111,6 +111,10 @@ class PathEncoder: which Kart continues to support. """ + LEGACY_ENCODER: "PathEncoder" + INT_PK_ENCODER: "PathEncoder" + GENERAL_ENCODER: "PathEncoder" + PATH_STRUCTURE_ITEM = "path-structure.json" @staticmethod diff --git a/kart/tabular/working_copy/base.py b/kart/tabular/working_copy/base.py index 002ba87e3..2c4652038 100644 --- a/kart/tabular/working_copy/base.py +++ b/kart/tabular/working_copy/base.py @@ -43,7 +43,7 @@ class TableWorkingCopy(WorkingCopyPart): """ # Subclasses should override if they can support more meta-items eg description or metadata.xml - SUPPORTED_META_ITEMS = ( + SUPPORTED_META_ITEMS: tuple[meta_items.MetaItemDefinition, ...] = ( meta_items.TITLE, meta_items.SCHEMA_JSON, meta_items.CRS_DEFINITIONS, @@ -1455,10 +1455,8 @@ def soft_reset_after_commit( def _check_for_unsupported_ds_types(self, sess, target_datasets, schema="public"): pass - -# Alias state_session to session: -# a session the working-copy in general is also a good session for reading/writing the state table. -TableWorkingCopy.state_session = TableWorkingCopy.session + def state_session(self): + return self.session() @contextlib.contextmanager diff --git a/kart/tabular/working_copy/db_server.py b/kart/tabular/working_copy/db_server.py index 9f5ccd11f..448371940 100644 --- a/kart/tabular/working_copy/db_server.py +++ b/kart/tabular/working_copy/db_server.py @@ -4,10 +4,11 @@ from urllib.parse import urlsplit, urlunsplit import click +from sqlalchemy.exc import DBAPIError from kart.exceptions import DbConnectionError, InvalidOperation from kart.sqlalchemy import DbType, strip_password -from sqlalchemy.exc import DBAPIError +from kart.utils import classproperty from . import TableWorkingCopyStatus from .base import TableWorkingCopy @@ -16,8 +17,7 @@ class DatabaseServer_WorkingCopy(TableWorkingCopy): """Functionality common to working copies that connect to a database server.""" - @property - @classmethod + @classproperty def URI_SCHEME(cls): """The URI scheme to connect to this type of database, eg "postgresql".""" raise NotImplementedError() diff --git a/kart/tabular/working_copy/gpkg.py b/kart/tabular/working_copy/gpkg.py index 2fe75964d..9aaf7497b 100644 --- a/kart/tabular/working_copy/gpkg.py +++ b/kart/tabular/working_copy/gpkg.py @@ -696,6 +696,3 @@ def _update_gpkg_contents(self, sess, dataset, commit=None): {"last_change": gpkg_change_time, "table_name": dataset.table_name}, ).rowcount assert rc == 1, f"gpkg_contents update: expected 1Δ, got {rc}" - - -WorkingCopy_GPKG.state_session = WorkingCopy_GPKG.session diff --git a/kart/tabular/working_copy/sqlserver.py b/kart/tabular/working_copy/sqlserver.py index 05912098d..bf44070ef 100644 --- a/kart/tabular/working_copy/sqlserver.py +++ b/kart/tabular/working_copy/sqlserver.py @@ -6,7 +6,7 @@ from kart.sqlalchemy import separate_last_path_part, text_with_inlined_params from kart.sqlalchemy.adapter.sqlserver import KartAdapter_SqlServer from kart.schema import Schema -from sqlalchemy.dialects.mssql.base import MSIdentifierPreparer +from sqlalchemy.dialects.mssql.base import MSIdentifierPreparer # type: ignore[attr-defined] from sqlalchemy.orm import sessionmaker from .db_server import DatabaseServer_WorkingCopy diff --git a/kart/tile/importer.py b/kart/tile/importer.py index 2da614bba..e52fa4145 100644 --- a/kart/tile/importer.py +++ b/kart/tile/importer.py @@ -122,7 +122,7 @@ def __init__( # A dict of the form {sidecar-key-prefix: sidecar-filename-suffix} # For example: {"pam": ".aux.xml"} since we use the prefix pam to label keys from .aux.xml aka PAM files. - SIDECAR_FILES = {} + SIDECAR_FILES: dict[str, str] = {} @cached_property def ALLOWED_SCHEMES(self): diff --git a/kart/tile/tile_dataset.py b/kart/tile/tile_dataset.py index 1b456b1e9..7fce0d69e 100644 --- a/kart/tile/tile_dataset.py +++ b/kart/tile/tile_dataset.py @@ -1,6 +1,6 @@ import functools import os - +from typing import ClassVar from kart.base_dataset import BaseDataset from kart.core import all_blobs_with_parent_in_tree from kart.decorators import allow_classmethod @@ -61,7 +61,7 @@ class TileDataset(BaseDataset): LINKED_STORAGE_JSON = meta_items.LINKED_STORAGE_JSON # Subclasses may override to add extra meta-items. - META_ITEMS = ( + META_ITEMS: ClassVar[tuple["MetaItemDefinition", ...]] = ( TITLE, DESCRIPTION, TAGS_JSON, @@ -283,7 +283,7 @@ def extract_tile_metadata(cls, path, oid_and_size=None): def diff( self, - other, + other: BaseDataset | None, ds_filter=DatasetKeyFilter.MATCH_ALL, reverse=False, diff_format=DiffFormat.FULL, @@ -302,7 +302,7 @@ def diff( other_subtree = other.get_subtree("tile") if other else self._empty_tree data_changes = self_subtree != other_subtree - ds_diff["data_changes"]: bool = data_changes + ds_diff["data_changes"] = data_changes # Else do a full diff. else: @@ -378,7 +378,7 @@ def tile_metadata(self): "crs.wkt": self.get_meta_item("crs.wkt"), } - def is_cloud_optimized(): + def is_cloud_optimized(self) -> bool: """Returns True if this dataset is constrained so that only cloud-optimized tiles can be added to it.""" raise NotImplementedError() diff --git a/kart/utils.py b/kart/utils.py index 7ae478d97..f4c123b31 100644 --- a/kart/utils.py +++ b/kart/utils.py @@ -68,3 +68,11 @@ def get_num_available_cores(): # sched_getaffinity isn't available on some platforms (macOS mostly I think) # Fallback to total machine CPUs return float(os.cpu_count()) + + +class classproperty: + def __init__(self, getter): + self.fget = getter + + def __get__(self, cls, owner): + return self.fget(owner) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..8e65dbf96 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,34 @@ +[mypy] +# when checking specific files (pre-commit), it's annoying when mypy reports a ton of errors in other files... +# NOTE: this seems to be the default when run on Linux but not on MacOS? (Undocumented though) +follow_imports = silent +files = kart/ +python_version = 3.11 + +[mypy-pysqlite3.*] +ignore_missing_imports = true + +[mypy-osgeo.*] +ignore_missing_imports = true + +[mypy-ipdb.*] +ignore_missing_imports = true + +[mypy-shellingham.*] +ignore_missing_imports = true + +[mypy-reflink.*] +ignore_missing_imports = true + +[mypy-boto3.*] +ignore_missing_imports = true + +[mypy-botocore.*] +ignore_missing_imports = true + +[mypy-pyodbc.*] +ignore_missing_imports = true + +[mypy-pygit2.*] +# TODO: remove this when upgrading to pygit2 1.16+, which has builtin type hints +ignore_missing_imports = true diff --git a/requirements/dev.in b/requirements/dev.in index 8d2b111be..e0d0617e4 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -19,9 +19,8 @@ mypy # note: not maintained; not compatible with sqlalchemy 2.x # so, probably drop this when we upgrade sqlalchemy sqlalchemy-stubs==0.4 - # NOTE: we actually still have pygit2 1.12, but this is the first release of types-pygit2 - # TODO: upgrade to pygit2 1.16+ and use the builtin types rather than using this package. - types-pygit2==1.14.0.20240317 - - types-tqdm + msgpack-types + types-jsonschema + types-psycopg2 types-Pygments==2.13.0 + types-tqdm diff --git a/requirements/dev.txt b/requirements/dev.txt index ee7fcf9c6..db169472c 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,37 +1,50 @@ # This file was autogenerated by uv via the following command: -# uv pip compile dev.in -o dev.txt +# uv pip compile requirements/dev.in -o requirements/dev.txt appnope==0.1.3 # via - # -r dev.in + # -r requirements/dev.in # ipython asttokens==2.2.1 # via stack-data +attrs==22.1.0 + # via + # -c requirements/requirements.txt + # -c requirements/test.txt + # referencing backcall==0.2.0 # via ipython colorama==0.4.6 # via - # -c docs.txt - # -c test.txt - # -r dev.in + # -c requirements/docs.txt + # -c requirements/test.txt + # -r requirements/dev.in decorator==5.1.1 # via # ipdb # ipython executing==1.2.0 # via stack-data +idna==3.7 + # via + # -c requirements/docs.txt + # yarl ipdb==0.13.11 - # via -r dev.in + # via -r requirements/dev.in ipython==8.10.0 # via - # -r dev.in + # -r requirements/dev.in # ipdb jedi==0.18.2 # via ipython matplotlib-inline==0.1.6 # via ipython +msgpack-types==0.3.0 + # via -r requirements/dev.in +multidict==6.1.0 + # via yarl mypy==1.13.0 # via - # -r dev.in + # -r requirements/dev.in # sqlalchemy-stubs mypy-extensions==1.0.0 # via mypy @@ -39,52 +52,60 @@ parso==0.8.3 # via jedi pexpect==4.8.0 # via - # -r dev.in + # -r requirements/dev.in # ipython pickleshare==0.7.5 # via ipython prompt-toolkit==3.0.36 # via ipython +propcache==0.2.0 + # via yarl ptyprocess==0.7.0 # via pexpect pure-eval==0.2.2 # via stack-data pygments==2.13.0 # via - # -c docs.txt - # -c requirements.txt + # -c requirements/docs.txt + # -c requirements/requirements.txt # ipython +pyrsistent==0.19.2 + # via + # -c requirements/requirements.txt + # referencing +referencing==0.8.11 + # via types-jsonschema six==1.16.0 # via - # -c docs.txt - # -c requirements.txt - # -c test.txt + # -c requirements/docs.txt + # -c requirements/requirements.txt + # -c requirements/test.txt # asttokens sqlalchemy-stubs==0.4 - # via -r dev.in + # via -r requirements/dev.in stack-data==0.6.2 # via ipython traitlets==5.7.1 # via # ipython # matplotlib-inline -types-cffi==1.16.0.20240331 - # via types-pygit2 types-docutils==0.21.0.20241005 # via types-pygments -types-pygit2==1.14.0.20240317 - # via -r dev.in +types-jsonschema==4.23.0.20240813 + # via -r requirements/dev.in +types-psycopg2==2.9.21.20241019 + # via -r requirements/dev.in types-pygments==2.13.0 - # via -r dev.in + # via -r requirements/dev.in types-setuptools==75.3.0.20241107 - # via - # types-cffi - # types-pygments + # via types-pygments types-tqdm==4.66.0.20240417 - # via -r dev.in + # via -r requirements/dev.in typing-extensions==4.12.2 # via # mypy # sqlalchemy-stubs wcwidth==0.2.5 # via prompt-toolkit +yarl==1.17.1 + # via referencing From b0d2d58e386c9b2703cf0faa66ab08720cb0a0d7 Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Sat, 9 Nov 2024 21:58:44 +1300 Subject: [PATCH 3/8] Bump click to avoid some spurious mypy errors https://github.com/pallets/click/issues/2558 --- requirements/dev.txt | 32 ++++++++++++++++++-------------- requirements/docs.txt | 2 +- requirements/requirements.in | 2 +- requirements/requirements.txt | 22 +++++++++++----------- requirements/test.txt | 10 ++++------ 5 files changed, 35 insertions(+), 33 deletions(-) diff --git a/requirements/dev.txt b/requirements/dev.txt index db169472c..ebdcb47d6 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,8 +1,12 @@ -# This file was autogenerated by uv via the following command: -# uv pip compile requirements/dev.in -o requirements/dev.txt +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# "cmake --build build --target py-requirements" +# appnope==0.1.3 # via - # -r requirements/dev.in + # -r dev.in # ipython asttokens==2.2.1 # via stack-data @@ -17,7 +21,7 @@ colorama==0.4.6 # via # -c requirements/docs.txt # -c requirements/test.txt - # -r requirements/dev.in + # -r dev.in decorator==5.1.1 # via # ipdb @@ -29,22 +33,22 @@ idna==3.7 # -c requirements/docs.txt # yarl ipdb==0.13.11 - # via -r requirements/dev.in + # via -r dev.in ipython==8.10.0 # via - # -r requirements/dev.in + # -r dev.in # ipdb jedi==0.18.2 # via ipython matplotlib-inline==0.1.6 # via ipython msgpack-types==0.3.0 - # via -r requirements/dev.in + # via -r dev.in multidict==6.1.0 # via yarl mypy==1.13.0 # via - # -r requirements/dev.in + # -r dev.in # sqlalchemy-stubs mypy-extensions==1.0.0 # via mypy @@ -52,7 +56,7 @@ parso==0.8.3 # via jedi pexpect==4.8.0 # via - # -r requirements/dev.in + # -r dev.in # ipython pickleshare==0.7.5 # via ipython @@ -82,7 +86,7 @@ six==1.16.0 # -c requirements/test.txt # asttokens sqlalchemy-stubs==0.4 - # via -r requirements/dev.in + # via -r dev.in stack-data==0.6.2 # via ipython traitlets==5.7.1 @@ -92,15 +96,15 @@ traitlets==5.7.1 types-docutils==0.21.0.20241005 # via types-pygments types-jsonschema==4.23.0.20240813 - # via -r requirements/dev.in + # via -r dev.in types-psycopg2==2.9.21.20241019 - # via -r requirements/dev.in + # via -r dev.in types-pygments==2.13.0 - # via -r requirements/dev.in + # via -r dev.in types-setuptools==75.3.0.20241107 # via types-pygments types-tqdm==4.66.0.20240417 - # via -r requirements/dev.in + # via -r dev.in typing-extensions==4.12.2 # via # mypy diff --git a/requirements/docs.txt b/requirements/docs.txt index 314d2f11f..fd9d67b8f 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # "cmake --build build --target py-requirements" diff --git a/requirements/requirements.in b/requirements/requirements.in index 33ef03a02..c53f4162f 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -1,6 +1,6 @@ boto3>=1.29.1 certifi -click~=8.1 +click~=8.1.7 docutils<0.18 msgpack~=0.6.1 Pygments diff --git a/requirements/requirements.txt b/requirements/requirements.txt index ecaebc0f1..f94696e88 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # "cmake --build build --target py-requirements" @@ -16,20 +16,20 @@ certifi==2022.12.7 # via -r requirements.in #cffi==1.16.0 # via - # -r vendor-wheels.txt + # -r requirements/vendor-wheels.txt # cryptography # pygit2 # reflink -click==8.1.4 +click==8.1.7 # via -r requirements.in -#cryptography==41.0.4 - # via -r vendor-wheels.txt +#cryptography==42.0.4 + # via -r requirements/vendor-wheels.txt docutils==0.17.1 # via # -r requirements.in # rst2txt #gdal==3.8.0 - # via -r vendor-wheels.txt + # via -r requirements/vendor-wheels.txt jmespath==1.0.1 # via # boto3 @@ -39,11 +39,11 @@ jsonschema==4.17.3 msgpack==0.6.2 # via -r requirements.in #psycopg2==2.9.9 - # via -r vendor-wheels.txt + # via -r requirements/vendor-wheels.txt pycparser==2.21 # via cffi #pygit2==1.12.1 - # via -r vendor-wheels.txt + # via -r requirements/vendor-wheels.txt pygments==2.13.0 # via # -r requirements.in @@ -51,15 +51,15 @@ pygments==2.13.0 pymysql==1.0.2 # via -r requirements.in pyodbc==5.1.0 - # via -r vendor-wheels.txt + # via -r requirements/vendor-wheels.txt pyrsistent==0.19.2 # via jsonschema #pysqlite3==0.5.2 - # via -r vendor-wheels.txt + # via -r requirements/vendor-wheels.txt python-dateutil==2.8.2 # via botocore #reflink==0.2.2 - # via -r vendor-wheels.txt + # via -r requirements/vendor-wheels.txt rst2txt==1.1.0 # via -r requirements.in s3transfer==0.7.0 diff --git a/requirements/test.txt b/requirements/test.txt index cc3dc47ca..d6b8d2a08 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # "cmake --build build --target py-requirements" @@ -10,14 +10,12 @@ atomicwrites==1.4.1 # via -r test.in attrs==22.1.0 # via - # -c requirements.txt + # -c requirements/requirements.txt # pytest colorama==0.4.6 # via -r test.in coverage[toml]==6.5.0 - # via - # coverage - # pytest-cov + # via pytest-cov execnet==1.9.0 # via pytest-xdist fields==5.0.0 @@ -65,7 +63,7 @@ pytest-xdist==3.1.0 # via -r test.in six==1.16.0 # via - # -c requirements.txt + # -c requirements/requirements.txt # html5lib # pytest-profiling termcolor==2.1.1 From b4c8b01d86b18d1d3c89dac364f049d845d06c8f Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Sat, 9 Nov 2024 19:34:15 +1300 Subject: [PATCH 4/8] Ignore mypy errors for click's shell_complete kwarg This is absent from click's type inline types somehow. Refs https://github.com/pallets/click/issues/2802 --- kart/apply.py | 2 +- kart/checkout.py | 22 ++++++++++++++++++---- kart/data.py | 7 ++++++- kart/diff.py | 2 +- kart/export.py | 2 +- kart/import_.py | 2 +- kart/init.py | 2 +- kart/log.py | 2 +- kart/point_cloud/import_.py | 2 +- kart/pull.py | 6 +++++- kart/raster/import_.py | 2 +- kart/resolve.py | 2 +- kart/show.py | 7 +++++-- kart/tabular/export.py | 2 +- kart/tabular/import_.py | 2 +- 15 files changed, 45 insertions(+), 19 deletions(-) diff --git a/kart/apply.py b/kart/apply.py index 2b962185a..ca9e877dc 100644 --- a/kart/apply.py +++ b/kart/apply.py @@ -370,7 +370,7 @@ def apply_patch( "--ref", default="HEAD", help="Which ref to apply the patch onto.", - shell_complete=ref_completer, + shell_complete=ref_completer, # type: ignore[call-arg] ) @click.option( "--amend", diff --git a/kart/checkout.py b/kart/checkout.py index aaaa73386..718845f58 100644 --- a/kart/checkout.py +++ b/kart/checkout.py @@ -62,7 +62,12 @@ multiple=True, help="Request that a particular dataset *not* be checked out (one which is currently configured to be checked out)", ) -@click.argument("refish", default=None, required=False, shell_complete=ref_completer) +@click.argument( + "refish", + default=None, + required=False, + shell_complete=ref_completer, # type: ignore[call-arg] +) def checkout( ctx, new_branch, @@ -320,7 +325,12 @@ def git_refetch(repo, promisor_remote, spec): help="If a local branch of given name doesn't exist, but a remote does, " "this option guesses that the user wants to create a local to track the remote", ) -@click.argument("refish", default=None, required=False, shell_complete=ref_completer) +@click.argument( + "refish", + default=None, + required=False, + shell_complete=ref_completer, # type: ignore[call-arg] +) def switch(ctx, create, force_create, discard_changes, do_guess, refish): """ Switch branches @@ -475,7 +485,7 @@ def _is_in_branches(branch_name, branches): "tag associated with it. " ), default="HEAD", - shell_complete=ref_completer, + shell_complete=ref_completer, # type: ignore[call-arg] ) @click.argument("filters", nargs=-1) def restore(ctx, source, filters): @@ -512,7 +522,11 @@ def restore(ctx, source, filters): is_flag=True, help="Discard local changes in working copy if necessary", ) -@click.argument("refish", default="HEAD", shell_complete=ref_completer) +@click.argument( + "refish", + default="HEAD", + shell_complete=ref_completer, # type: ignore[call-arg] +) def reset(ctx, discard_changes, refish): """ Reset the branch head to point to a particular commit. diff --git a/kart/data.py b/kart/data.py index 56b489f12..c6aa98052 100644 --- a/kart/data.py +++ b/kart/data.py @@ -33,7 +33,12 @@ def data(ctx, **kwargs): is_flag=True, help="When set, outputs the dataset type and version. (This may become the default in a later version of Kart)", ) -@click.argument("refish", required=False, default="HEAD", shell_complete=ref_completer) +@click.argument( + "refish", + required=False, + default="HEAD", + shell_complete=ref_completer, # type: ignore[call-arg] +) @click.pass_context def data_ls(ctx, output_format, with_dataset_types, refish): """List all of the datasets in the Kart repository""" diff --git a/kart/diff.py b/kart/diff.py index 5f7a7a6c1..3b6c86116 100644 --- a/kart/diff.py +++ b/kart/diff.py @@ -159,7 +159,7 @@ def feature_count_diff( metavar="[REVISIONS] [--] [FILTERS]", nargs=-1, type=click.UNPROCESSED, - shell_complete=ref_or_repo_path_completer, + shell_complete=ref_or_repo_path_completer, # type: ignore[call-arg] ) def diff( ctx, diff --git a/kart/export.py b/kart/export.py index 64b671559..90bfa118a 100644 --- a/kart/export.py +++ b/kart/export.py @@ -25,7 +25,7 @@ "args", nargs=-1, metavar="DATASET [EXPORT_TYPE:]DESTINATION", - shell_complete=repo_path_completer, + shell_complete=repo_path_completer, # type: ignore[call-arg] ) def export(ctx, args, **kwargs): """ diff --git a/kart/import_.py b/kart/import_.py index d8fdb4e80..bf4b23f1d 100644 --- a/kart/import_.py +++ b/kart/import_.py @@ -75,7 +75,7 @@ def list_import_formats(ctx): "args", nargs=-1, metavar="SOURCE [[SOURCES...] or [DATASETS...]]", - shell_complete=file_path_completer, + shell_complete=file_path_completer, # type: ignore[call-arg] ) def import_(ctx, args, **kwargs): """ diff --git a/kart/init.py b/kart/init.py index 7e177821f..b65dd5874 100644 --- a/kart/init.py +++ b/kart/init.py @@ -26,7 +26,7 @@ "--import", "import_from", help='Import a database (all tables): "FORMAT:PATH" eg. "GPKG:my.gpkg". Currently only tabular formats are supported.', - shell_complete=file_path_completer, + shell_complete=file_path_completer, # type: ignore[call-arg] ) @click.option( "--checkout/--no-checkout", diff --git a/kart/log.py b/kart/log.py index f363592f5..35642cd63 100644 --- a/kart/log.py +++ b/kart/log.py @@ -235,7 +235,7 @@ def convert_user_patterns_to_raw_paths(paths, repo, commits): "args", metavar="[REVISIONS] [--] [FILTERS]", nargs=-1, - shell_complete=ref_or_repo_path_completer, + shell_complete=ref_or_repo_path_completer, # type: ignore[call-arg] ) def log( ctx, diff --git a/kart/point_cloud/import_.py b/kart/point_cloud/import_.py index da6a7ceca..b2985b212 100644 --- a/kart/point_cloud/import_.py +++ b/kart/point_cloud/import_.py @@ -116,7 +116,7 @@ "args", nargs=-1, metavar="SOURCE [SOURCES...]", - shell_complete=file_path_completer, + shell_complete=file_path_completer, # type: ignore[call-arg] ) def point_cloud_import( ctx, diff --git a/kart/pull.py b/kart/pull.py index 99010867f..746d62885 100644 --- a/kart/pull.py +++ b/kart/pull.py @@ -45,7 +45,11 @@ ) @click.argument("repository", required=False, metavar="REMOTE") @click.argument( - "refspecs", nargs=-1, required=False, metavar="REFISH", shell_complete=ref_completer + "refspecs", + nargs=-1, + required=False, + metavar="REFISH", + shell_complete=ref_completer, # type: ignore[call-arg] ) @click.pass_context def pull(ctx, ff, ff_only, launch_editor, do_progress, repository, refspecs): diff --git a/kart/raster/import_.py b/kart/raster/import_.py index fd9fa9dcc..8b933485a 100644 --- a/kart/raster/import_.py +++ b/kart/raster/import_.py @@ -115,7 +115,7 @@ "args", nargs=-1, metavar="SOURCE [SOURCES...]", - shell_complete=file_path_completer, + shell_complete=file_path_completer, # type: ignore[call-arg] ) def raster_import( ctx, diff --git a/kart/resolve.py b/kart/resolve.py index f9f144d2d..403c3cf40 100644 --- a/kart/resolve.py +++ b/kart/resolve.py @@ -538,7 +538,7 @@ def _fix_conflict_label(conflict_label): "conflict_labels", nargs=-1, metavar="[CONFLICT_LABELS]", - shell_complete=conflict_completer, + shell_complete=conflict_completer, # type: ignore[call-arg] ) def resolve(ctx, with_version, file_path, renumber, conflict_labels): """Resolve a merge conflict, using one of the conflicting versions, or with a user-supplied resolution.""" diff --git a/kart/show.py b/kart/show.py index 1f071e949..89b4b247d 100644 --- a/kart/show.py +++ b/kart/show.py @@ -75,7 +75,7 @@ metavar="[REVISION] [--] [FILTERS]", nargs=-1, type=click.UNPROCESSED, - shell_complete=ref_or_repo_path_completer, + shell_complete=ref_or_repo_path_completer, # type: ignore[call-arg] ) @click.option( "--diff-format", @@ -184,7 +184,10 @@ def show( ) # NOTE: this is *required* for now. # A future version might create patches from working-copy changes. -@click.argument("refish", shell_complete=ref_completer) +@click.argument( + "refish", + shell_complete=ref_completer, # type: ignore[call-arg] +) def create_patch( ctx, *, diff --git a/kart/tabular/export.py b/kart/tabular/export.py index 3194dd8fc..5acdcd9c5 100644 --- a/kart/tabular/export.py +++ b/kart/tabular/export.py @@ -167,7 +167,7 @@ def get_driver(destination_spec): "args", nargs=-1, metavar="DATASET [EXPORT_TYPE:]DESTINATION", - shell_complete=repo_path_completer, + shell_complete=repo_path_completer, # type: ignore[call-arg] ) def table_export( ctx, diff --git a/kart/tabular/import_.py b/kart/tabular/import_.py index b9bcb6602..706e82500 100644 --- a/kart/tabular/import_.py +++ b/kart/tabular/import_.py @@ -179,7 +179,7 @@ def any_at_all(iterable): "args", nargs=-1, metavar="SOURCE ([SOURCES...] or [TABLES...])", - shell_complete=file_path_completer, + shell_complete=file_path_completer, # type: ignore[call-arg] ) def table_import( ctx, From a18573b2c1d28f454efbb5de054c087a2b738650 Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Sat, 9 Nov 2024 21:00:42 +1300 Subject: [PATCH 5/8] Add mypy pre-commit hook Follows recommendations at https://github.com/python/mypy/issues/13916 --- .pre-commit-config.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 516e7180a..60b46c9dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,3 +28,11 @@ repos: - id: cmake-lint args: - --suppress-decorations + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.13.0 + hooks: + - id: mypy + name: Check types with mypy + language: system + entry: build/venv/bin/mypy + pass_filenames: false \ No newline at end of file From f8c6bbd9cf7eb7bab7e9dd6bc873ab9017a23a5c Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Sat, 9 Nov 2024 22:06:26 +1300 Subject: [PATCH 6/8] Github Actions type checking --- .github/workflows/build.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8c9e5aeb7..95c239a69 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,6 +12,19 @@ env: AWS_NO_SIGN_REQUEST: "1" jobs: + type-checking: + name: Run Type Checking + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: 'pip' # caching pip dependencies + - run: pip install -r requirements/requirements.txt -r requirements/test.txt -r requirements/dev.txt + - name: Run mypy + run: mypy + # Increase rate limits for public registry amazon-ecr-auth: runs-on: ubuntu-latest From 9afda4c82fae75a3ce1658d14845e307971e1407 Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Mon, 11 Nov 2024 11:24:27 +1300 Subject: [PATCH 7/8] Fix WC session aliasing issue session & state_session are no longer the same function; one calls the other --- kart/working_copy.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/kart/working_copy.py b/kart/working_copy.py index 41fd7f9c9..9ea2cefe0 100644 --- a/kart/working_copy.py +++ b/kart/working_copy.py @@ -668,14 +668,7 @@ def reset( repo_key_filter, ) - # Start a DB session that surrounds both the working copy modifications and the state table modifications - - # if the same DB session can be used for both, otherwise, let _do_reset_datasets handle it. - session_context = ( - self.session - if hasattr(self, "session") and self.session == self.state_session - else contextlib.nullcontext - ) - with session_context(): + with self.state_session() as sess: if ds_inserts or ds_updates or ds_deletes: self._do_reset_datasets( base_datasets=base_datasets, @@ -691,15 +684,12 @@ def reset( quiet=quiet, ) - with self.state_session() as sess: - if not track_changes_as_dirty: - self._update_state_table_tree(sess, target_tree_id) - self._update_state_table_spatial_filter_hash( - sess, self.repo.spatial_filter.hexhash - ) - self._update_state_table_non_checkout_datasets( - sess, non_checkout_datasets - ) + if not track_changes_as_dirty: + self._update_state_table_tree(sess, target_tree_id) + self._update_state_table_spatial_filter_hash( + sess, self.repo.spatial_filter.hexhash + ) + self._update_state_table_non_checkout_datasets(sess, non_checkout_datasets) def _do_reset_datasets( self, From ce49e47b75862d10dbe0aeaa60e8bd23aec9b656 Mon Sep 17 00:00:00 2001 From: Craig de Stigter Date: Mon, 11 Nov 2024 13:08:02 +1300 Subject: [PATCH 8/8] Fix hang in pointcloud conflict tests --- kart/cli_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kart/cli_util.py b/kart/cli_util.py index a5a28a4a6..dbbdb0058 100644 --- a/kart/cli_util.py +++ b/kart/cli_util.py @@ -179,7 +179,7 @@ def handle_parse_result(self, ctx, opts, args: list[str]): f"is mutually exclusive with {other.get_error_hint(ctx)}." ) else: - self.prompt = "" + self.prompt = None return super().handle_parse_result(ctx, opts, args)