From 1451f74f178c2f0015a0b14a9345f471fb526049 Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Fri, 8 Nov 2024 14:12:05 +1300
Subject: [PATCH 1/8] Install mypy and some stub packages in dev deps

---
 requirements/dev.in  | 11 +++++++++++
 requirements/dev.txt | 34 ++++++++++++++++++++++++++++------
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/requirements/dev.in b/requirements/dev.in
index 02dac4184..8d2b111be 100644
--- a/requirements/dev.in
+++ b/requirements/dev.in
@@ -14,3 +14,14 @@ ipython
     pexpect>4.3
     appnope
     colorama
+
+mypy
+    # note: not maintained; not compatible with sqlalchemy 2.x
+    # so, probably drop this when we upgrade sqlalchemy
+    sqlalchemy-stubs==0.4
+    # NOTE: we actually still have pygit2 1.12, but this is the first release of types-pygit2
+    # TODO: upgrade to pygit2 1.16+ and use the builtin types rather than using this package.
+    types-pygit2==1.14.0.20240317
+
+    types-tqdm
+    types-Pygments==2.13.0
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 3061840b8..ee7fcf9c6 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,9 +1,5 @@
-#
-# This file is autogenerated by pip-compile with Python 3.12
-# by the following command:
-#
-#    "cmake --build build --target py-requirements"
-#
+# This file was autogenerated by uv via the following command:
+#    uv pip compile dev.in -o dev.txt
 appnope==0.1.3
     # via
     #   -r dev.in
@@ -33,6 +29,12 @@ jedi==0.18.2
     # via ipython
 matplotlib-inline==0.1.6
     # via ipython
+mypy==1.13.0
+    # via
+    #   -r dev.in
+    #   sqlalchemy-stubs
+mypy-extensions==1.0.0
+    # via mypy
 parso==0.8.3
     # via jedi
 pexpect==4.8.0
@@ -58,11 +60,31 @@ six==1.16.0
     #   -c requirements.txt
     #   -c test.txt
     #   asttokens
+sqlalchemy-stubs==0.4
+    # via -r dev.in
 stack-data==0.6.2
     # via ipython
 traitlets==5.7.1
     # via
     #   ipython
     #   matplotlib-inline
+types-cffi==1.16.0.20240331
+    # via types-pygit2
+types-docutils==0.21.0.20241005
+    # via types-pygments
+types-pygit2==1.14.0.20240317
+    # via -r dev.in
+types-pygments==2.13.0
+    # via -r dev.in
+types-setuptools==75.3.0.20241107
+    # via
+    #   types-cffi
+    #   types-pygments
+types-tqdm==4.66.0.20240417
+    # via -r dev.in
+typing-extensions==4.12.2
+    # via
+    #   mypy
+    #   sqlalchemy-stubs
 wcwidth==0.2.5
     # via prompt-toolkit

From 470022f23e683210f8a486192ea7d6bbb80e405b Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Fri, 8 Nov 2024 16:44:01 +1300
Subject: [PATCH 2/8] Add type hints and mypy config

---
 kart/__init__.py                       |  11 +-
 kart/annotations/db.py                 |   6 +-
 kart/base_dataset.py                   | 344 ++++++++++++++++++++++-
 kart/base_diff_writer.py               |  70 +++--
 kart/cli_util.py                       |  10 +-
 kart/completion.py                     |   9 +-
 kart/conflicts_util.py                 |   7 +-
 kart/conflicts_writer.py               | 104 +++----
 kart/dataset_mixins.py                 | 359 -------------------------
 kart/dataset_util.py                   |   2 +-
 kart/diff.py                           |   2 +-
 kart/diff_structs.py                   |  11 +-
 kart/html_diff_writer.py               |   5 -
 kart/json_diff_writers.py              |  29 +-
 kart/key_filters.py                    |  18 +-
 kart/merge_util.py                     |  15 +-
 kart/meta_items.py                     |   5 +
 kart/raw_diff_delta.py                 |  34 +++
 kart/repo.py                           |   3 +-
 kart/schema.py                         |   4 +-
 kart/show.py                           |   2 +-
 kart/spatial_filter/__init__.py        |   6 +
 kart/spatial_filter/index.py           |  49 ++--
 kart/sqlalchemy/adapter/postgis.py     |   1 -
 kart/sqlalchemy/adapter/sqlserver.py   |   2 +-
 kart/sqlalchemy/sqlserver.py           |   4 +-
 kart/structure.py                      |   8 +-
 kart/tabular/import_source.py          |  12 +-
 kart/tabular/rich_table_dataset.py     |   2 +-
 kart/tabular/v2.py                     |   3 +-
 kart/tabular/v3.py                     |   4 +-
 kart/tabular/v3_paths.py               |   4 +
 kart/tabular/working_copy/base.py      |   8 +-
 kart/tabular/working_copy/db_server.py |   6 +-
 kart/tabular/working_copy/gpkg.py      |   3 -
 kart/tabular/working_copy/sqlserver.py |   2 +-
 kart/tile/importer.py                  |   2 +-
 kart/tile/tile_dataset.py              |  10 +-
 kart/utils.py                          |   8 +
 mypy.ini                               |  34 +++
 requirements/dev.in                    |   9 +-
 requirements/dev.txt                   |  69 +++--
 42 files changed, 699 insertions(+), 597 deletions(-)
 delete mode 100644 kart/dataset_mixins.py
 create mode 100644 kart/raw_diff_delta.py
 create mode 100644 mypy.ini

diff --git a/kart/__init__.py b/kart/__init__.py
index b6f471901..b360e1eec 100644
--- a/kart/__init__.py
+++ b/kart/__init__.py
@@ -12,7 +12,6 @@
 import importlib
 import logging
 import os
-import platform
 import sys
 
 L = logging.getLogger("kart.__init__")
@@ -40,7 +39,7 @@
 )
 
 try:
-    import _kart_env
+    import _kart_env  # type: ignore[import]
 
     L.debug("Found _kart_env configuration module")
 except ImportError:
@@ -48,9 +47,9 @@
     _kart_env = None
 
 is_frozen = getattr(sys, "frozen", None) and hasattr(sys, "_MEIPASS")
-is_darwin = platform.system() == "Darwin"
-is_linux = platform.system() == "Linux"
-is_windows = platform.system() == "Windows"
+is_darwin = sys.platform == "darwin"
+is_linux = sys.platform == "linux"
+is_windows = sys.platform == "win32"
 
 if is_darwin:
     libsuffix = "dylib"
@@ -146,7 +145,7 @@ def _env_path(path):
 os.environ["PATH"] = (
     os.pathsep.join(path_extras) + os.pathsep + os.environ.get("PATH", "")
 )
-if is_windows:
+if sys.platform == "win32":
     os.add_dll_directory(prefix if is_frozen else os.path.join(prefix, "lib"))
     # FIXME: git2.dll is in the package directory, but isn't setup for ctypes to use
     _pygit2_spec = importlib.util.find_spec("pygit2")
diff --git a/kart/annotations/db.py b/kart/annotations/db.py
index 92d9beb53..a771a2fbe 100644
--- a/kart/annotations/db.py
+++ b/kart/annotations/db.py
@@ -2,7 +2,7 @@
 import json
 import logging
 import threading
-
+from typing import TypeAlias
 import sqlalchemy
 
 from kart.sqlalchemy.sqlite import sqlite_engine
@@ -13,7 +13,9 @@
 from sqlalchemy.schema import CreateTable
 
 L = logging.getLogger(__name__)
-Base = declarative_base()
+
+# TODO: remove this type ignore when we upgrade sqlalchemy to 2.0 and replace with DeclarativeBase
+Base: TypeAlias = declarative_base()  # type: ignore[valid-type]
 
 
 class KartAnnotation(Base):
diff --git a/kart/base_dataset.py b/kart/base_dataset.py
index 0875bcf01..73d0594cb 100644
--- a/kart/base_dataset.py
+++ b/kart/base_dataset.py
@@ -1,14 +1,19 @@
 import functools
 import logging
 import sys
+from collections.abc import Iterable
+from typing import Callable, Optional, Protocol, TYPE_CHECKING, ClassVar
 
 import click
 import pygit2
 
 from kart.core import all_blobs_in_tree, all_blobs_with_paths_in_tree
-from kart.dataset_mixins import DatasetDiffMixin
+from kart.diff_format import DiffFormat
+from kart.diff_structs import DatasetDiff, DeltaDiff, Delta
 from kart.exceptions import InvalidOperation, UNSUPPORTED_VERSION, PATCH_DOES_NOT_APPLY
+from kart.key_filters import DatasetKeyFilter, MetaKeyFilter, UserStringKeyFilter
 from kart.meta_items import MetaItemFileType, MetaItemVisibility
+from kart.raw_diff_delta import RawDiffDelta
 
 
 class BaseDatasetMetaClass(type):
@@ -33,7 +38,7 @@ def __new__(*args):
         return dataset_cls
 
 
-class BaseDataset(DatasetDiffMixin, metaclass=BaseDatasetMetaClass):
+class BaseDataset(metaclass=BaseDatasetMetaClass):
     """
     Common interface for all datasets.
 
@@ -52,24 +57,26 @@ class BaseDataset(DatasetDiffMixin, metaclass=BaseDatasetMetaClass):
 
     # Subclasses should override these fields to properly implement the dataset interface.
 
-    DATASET_TYPE = None  # Example: "hologram"
-    VERSION = None  # Example: 1
-    DATASET_DIRNAME = None  # Example: ".hologram-dataset.v1".
+    DATASET_TYPE: ClassVar[str]  # Example: "hologram"
+    VERSION: ClassVar[int]  # Example: 1
+    DATASET_DIRNAME: ClassVar[str | None] = None  # Example: ".hologram-dataset.v1".
     # (This should match the pattern DATASET_DIRNAME_PATTERN in kart.structure.)
 
     # The name / type of main items that make up this dataset - ie, not the meta items.
-    ITEM_TYPE = None
+    ITEM_TYPE: ClassVar[str | None] = None
 
-    WORKING_COPY_PART_TYPE = None  # Example: working_copy.PartType.TABULAR
+    WORKING_COPY_PART_TYPE: ClassVar["PartType | None"] = (
+        None  # Example: working_copy.PartType.TABULAR
+    )
 
     # Paths are generally relative to self.inner_tree, but table datasets put certain meta items in the outer-tree.
     # (This is an anti-pattern - when designing a new dataset type, don't put meta items in the outer-tree.)
 
     # Where meta-items are stored - blobs containing metadata about the structure or schema of the dataset.
-    META_PATH = "meta/"
+    META_PATH: ClassVar[str] = "meta/"
 
     # No meta-items are defined by default - subclasses have to define these themselves.
-    META_ITEMS = ()
+    META_ITEMS: ClassVar[tuple["MetaItemDefinition", ...]] = ()
 
     @classmethod
     def is_dataset_tree(cls, tree):
@@ -119,6 +126,317 @@ def __init__(self, tree, path, repo, dirname=None):
 
         self.ensure_only_supported_capabilities()
 
+    def diff(
+        self,
+        other: Optional["BaseDataset"],
+        ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL,
+        reverse: bool = False,
+        diff_format: DiffFormat = DiffFormat.FULL,
+    ):
+        """
+        Generates a Diff from self -> other.
+        If reverse is true, generates a diff from other -> self.
+        """
+        ds_diff = DatasetDiff()
+        meta_filter = ds_filter.get("meta", ds_filter.child_type())
+        ds_diff["meta"] = self.diff_meta(other, meta_filter, reverse=reverse)
+        return ds_diff
+
+    def diff_meta(
+        self,
+        other: Optional["BaseDataset"],
+        meta_filter: UserStringKeyFilter = MetaKeyFilter.MATCH_ALL,
+        reverse: bool = False,
+    ):
+        """
+        Returns a diff from self -> other, but only for meta items.
+        If reverse is true, generates a diff from other -> self.
+        """
+        old: BaseDataset | None
+        new: BaseDataset | None
+        if reverse:
+            old, new = other, self
+        else:
+            old, new = self, other
+
+        meta_old = (
+            {k: v for k, v in old.meta_items().items() if k in meta_filter}
+            if old
+            else {}
+        )
+        meta_new = (
+            {k: v for k, v in new.meta_items().items() if k in meta_filter}
+            if new
+            else {}
+        )
+        return DeltaDiff.diff_dicts(meta_old, meta_new)
+
+    def diff_to_working_copy(
+        self,
+        workdir_diff_cache: "WorkdirDiffCache",
+        ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL,
+        *,
+        convert_to_dataset_format: bool | None = None,
+    ):
+        """
+        Generates a diff from self to the working-copy.
+        It may be the case that only the dataset-revision used to write the working
+        copy can be used to do this (if we are tracking changes from that revision).
+        See diff_util.get_dataset_diff() to generate diffs more generally.
+        """
+        # Subclasses to override.
+        pass
+
+    def get_raw_diff_for_subtree(
+        self,
+        other: Optional["BaseDataset"],
+        subtree_name: str,
+        reverse: bool = False,
+    ):
+        """
+        Get a pygit2.Diff of the diff between some subtree of this dataset, and the same subtree of another dataset
+        (typically actually the "same" dataset, but at a different revision).
+        """
+        flags = pygit2.GIT_DIFF_SKIP_BINARY_CHECK
+        self_subtree = self.get_subtree(subtree_name)
+        other_subtree = other.get_subtree(subtree_name) if other else self._empty_tree
+
+        diff = self_subtree.diff_to_tree(other_subtree, flags=flags, swap=reverse)
+        self.L.debug(
+            "diff %s (%s -> %s / %s): %s changes",
+            subtree_name,
+            self_subtree.id,
+            other_subtree.id if other_subtree else None,
+            "R" if reverse else "F",
+            len(diff),
+        )
+        return diff
+
+    def diff_subtree(
+        self,
+        other: Optional["BaseDataset"],
+        subtree_name: str,
+        key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL,
+        *,
+        key_decoder_method: str,
+        value_decoder_method: str,
+        key_encoder_method: Optional[str] = None,
+        reverse: bool = False,
+    ):
+        """
+        A pattern for datasets to use for diffing some specific subtree. Works as follows:
+        1. Take some specific subtree of self and of a other
+           (ie self.inner_tree / "feature", other.inner_tree / "feature")
+        2. Use get_raw_diff_from_subtree to get a pygit2.Diff of the changes between those two trees.
+        3. Go through all the resulting (insert, update, delete) deltas
+        4. Fix up the paths to be relative to the dataset again (ie, prepend "feature/" onto them all
+        5. Run some transform on each path to decide what to call each item (eg decode primary key)
+        6. Run some transform on each path to load the content of each item (eg, read and decode feature)
+
+        Args:
+        other - a dataset similar to self (ie the same dataset, but at a different commit).
+            This can be None, in which case there are no items in other and they don't need to be transformed.
+        subtree_name - the name of the subtree of the dataset to scan for diffs.
+        key_filter - deltas are only yielded if they involve at least one key that matches the key filter.
+        key_decoder_method, value_decoder_method - these must be names of methods that are present in both
+            self and other - self's methods are used to decode self's items, and other's methods for other's items.
+        key_encoder_method - optional. A method that is present in both self and other that allows us to go
+            straight to the keys the user is interested in (if they have requested specific keys in the key_filter).
+        reverse - normally yields deltas from self -> other, but if reverse is True, yields deltas from other -> self.
+        """
+
+        subtree_name = subtree_name.rstrip("/")
+
+        if not key_filter.match_all and key_encoder_method is not None:
+            # Handle the case where we are only interested in a few features.
+            deltas = self.get_raw_deltas_for_keys(
+                other, key_encoder_method, key_filter, reverse=reverse
+            )
+        else:
+            raw_diff = self.get_raw_diff_for_subtree(
+                other, subtree_name, reverse=reverse
+            )
+            # NOTE - we could potentially call diff.find_similar() to detect renames here
+            deltas = self.wrap_deltas_from_raw_diff(
+                raw_diff, lambda path: f"{subtree_name}/{path}"
+            )
+
+        def _no_dataset_error(method_name):
+            raise RuntimeError(
+                f"Can't call {method_name} to decode diff deltas: dataset is None"
+            )
+
+        def get_dataset_attr(dataset, method_name):
+            if dataset is not None:
+                return getattr(dataset, method_name)
+            # This shouldn't happen:
+            return lambda x: _no_dataset_error(method_name)
+
+        old: BaseDataset | None
+        new: BaseDataset | None
+        if reverse:
+            old, new = other, self
+        else:
+            old, new = self, other
+
+        yield from self.transform_raw_deltas(
+            deltas,
+            key_filter,
+            old_key_transform=get_dataset_attr(old, key_decoder_method),
+            old_value_transform=get_dataset_attr(old, value_decoder_method),
+            new_key_transform=get_dataset_attr(new, key_decoder_method),
+            new_value_transform=get_dataset_attr(new, value_decoder_method),
+        )
+
+    # We treat UNTRACKED like an ADD since we don't have a staging area -
+    # if the user has untracked files, we have to assume they want to add them.
+    # (This is not actually needed right now since we are not using this for working copy diffs).
+    _INSERT_TYPES = (pygit2.GIT_DELTA_ADDED, pygit2.GIT_DELTA_UNTRACKED)
+    _UPDATE_TYPES = (pygit2.GIT_DELTA_MODIFIED,)
+    _DELETE_TYPES = (pygit2.GIT_DELTA_DELETED,)
+
+    _INSERT_UPDATE_DELETE = _INSERT_TYPES + _UPDATE_TYPES + _DELETE_TYPES
+    _INSERT_UPDATE = _INSERT_TYPES + _UPDATE_TYPES
+    _UPDATE_DELETE = _UPDATE_TYPES + _DELETE_TYPES
+
+    def get_raw_deltas_for_keys(
+        self,
+        other: Optional["BaseDataset"],
+        key_encoder_method: str,
+        key_filter: UserStringKeyFilter,
+        reverse: bool = False,
+    ):
+        """
+        Since we know which keys we are looking for, we can encode those keys and look up those blobs directly.
+        We output this as a series of deltas, just as we do when we run a normal diff, so that we can
+        take output from either code path and use it to generate a kart diff using transform_raw_deltas.
+        """
+
+        def _expand_keys(keys):
+            # If the user asks for mydataset:feature:123 they might mean str("123") or int(123) - which
+            # would be encoded differently. We look up both paths to see what we can find.
+            for key in keys:
+                yield key
+                if isinstance(key, str) and key.isdigit():
+                    yield int(key)
+
+        encode_fn = getattr(self, key_encoder_method)
+        paths = set()
+        for key in _expand_keys(key_filter):
+            try:
+                paths.add(encode_fn(key, relative=True))
+            except TypeError:
+                # The path encoder for this dataset can't encode that key, so it won't be there.
+                continue
+
+        old: BaseDataset | None
+        new: BaseDataset | None
+        if reverse:
+            old, new = other, self
+        else:
+            old, new = self, other
+
+        def _get_blob(dataset, path):
+            if dataset is None or dataset.inner_tree is None:
+                return None
+            try:
+                return dataset.inner_tree / path
+            except KeyError:
+                return None
+
+        for path in paths:
+            old_blob = _get_blob(old, path)
+            new_blob = _get_blob(new, path)
+            if old_blob is None and new_blob is None:
+                continue
+            if (
+                old_blob is not None
+                and new_blob is not None
+                and old_blob.oid == new_blob.oid
+            ):
+                continue
+            yield RawDiffDelta.of(
+                path if old_blob else None, path if new_blob else None
+            )
+
+    def wrap_deltas_from_raw_diff(
+        self,
+        raw_diff: pygit2.Diff,
+        path_transform: Callable[[str], str],
+    ):
+        for delta in raw_diff.deltas:
+            old_path = path_transform(delta.old_file.path) if delta.old_file else None
+            new_path = path_transform(delta.new_file.path) if delta.new_file else None
+            yield RawDiffDelta(delta.status, delta.status_char(), old_path, new_path)
+
+    def transform_raw_deltas(
+        self,
+        deltas: Iterable["RawDiffDelta"],
+        key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL,
+        *,
+        old_key_transform: Callable[[str], str] = lambda x: x,
+        old_value_transform: Callable[[str], str] = lambda x: x,
+        new_key_transform: Callable[[str], str] = lambda x: x,
+        new_value_transform: Callable[[str], str] = lambda x: x,
+    ):
+        """
+        Given a list of RawDiffDeltas - inserts, updates, and deletes that happened at particular paths -
+        yields a list of Kart deltas, which look something like ((old_key, old_value), (new_key, new_value)).
+        A key could be a path, a meta-item name, or a primary key value.
+
+        key-filter - deltas are discarded if they don't involve any keys that matches the key filter.
+        old/new_key_transform - converts the path into a key.
+        old/new_value_transform - converts the canonical-path into a value,
+            presumably first by loading the file contents at that path.
+
+        If any transform is not set, that transform defaults to returning the value it was input.
+        """
+        for d in deltas:
+            self.L.debug("diff(): %s %s %s", d.status_char, d.old_path, d.new_path)
+
+            if d.status not in self._INSERT_UPDATE_DELETE:
+                # RENAMED, COPIED, IGNORED, TYPECHANGE, UNMODIFIED, UNREADABLE
+                # We don't enounter these status codes in the diffs we generate.
+                raise NotImplementedError(f"Delta status: {d.status_char}")
+
+            if d.status in self._UPDATE_DELETE:
+                old_key = old_key_transform(d.old_path)
+            else:
+                old_key = None
+
+            if d.status in self._INSERT_UPDATE:
+                new_key = new_key_transform(d.new_path)
+            else:
+                new_key = None
+
+            if old_key not in key_filter and new_key not in key_filter:
+                continue
+
+            if d.status in self._INSERT_TYPES:
+                self.L.debug("diff(): insert %s (%s)", d.new_path, new_key)
+            elif d.status in self._UPDATE_TYPES:
+                self.L.debug(
+                    "diff(): update %s %s -> %s %s",
+                    d.old_path,
+                    old_key,
+                    d.new_path,
+                    new_key,
+                )
+            elif d.status in self._DELETE_TYPES:
+                self.L.debug("diff(): delete %s %s", d.old_path, old_key)
+
+            if d.status in self._UPDATE_DELETE:
+                old_half_delta = old_key, old_value_transform(d.old_path)
+            else:
+                old_half_delta = None
+
+            if d.status in self._INSERT_UPDATE:
+                new_half_delta = new_key, new_value_transform(d.new_path)
+            else:
+                new_half_delta = None
+
+            yield Delta(old_half_delta, new_half_delta)
+
     def ensure_only_supported_capabilities(self):
         # TODO - loosen this restriction. A dataset with capabilities that we don't support should (at worst) be treated
         # the same as any other unsupported dataset.
@@ -138,7 +456,7 @@ def ensure_only_supported_capabilities(self):
             )
 
     @functools.lru_cache()
-    def get_subtree(self, subtree_path):
+    def get_subtree(self, subtree_path) -> pygit2.Tree:
         if self.inner_tree is not None:
             try:
                 return self.inner_tree / subtree_path
@@ -472,3 +790,9 @@ def decode_path(self, path):
         else:
             # It generally shouldn't happen that we have files in the top-level.
             return ("", rel_path)
+
+
+if TYPE_CHECKING:
+    from kart.working_copy import PartType
+    from kart.meta_items import MetaItemDefinition
+    from kart.workdir import WorkdirDiffCache
diff --git a/kart/base_diff_writer.py b/kart/base_diff_writer.py
index 63a2fdbdc..3f109389e 100644
--- a/kart/base_diff_writer.py
+++ b/kart/base_diff_writer.py
@@ -4,12 +4,19 @@
 import re
 import sys
 from pathlib import Path
+from typing import Generator
 
 import click
 
 from kart import diff_util
 from kart.diff_format import DiffFormat
-from kart.diff_structs import FILES_KEY, WORKING_COPY_EDIT, BINARY_FILE, Delta
+from kart.diff_structs import (
+    FILES_KEY,
+    WORKING_COPY_EDIT,
+    BINARY_FILE,
+    Delta,
+    DatasetDiff,
+)
 from kart.exceptions import CrsError, InvalidOperation
 from kart.key_filters import RepoKeyFilter
 from kart import list_of_conflicts
@@ -736,8 +743,51 @@ def _check_for_linked_dataset_changes(self, ds_path, ds_diff):
         if dataset and dataset.get_meta_item("linked-storage.json") and ds_diff:
             self.linked_dataset_changes.add(ds_path)
 
+    def filtered_dataset_deltas_as_geojson(
+        self, ds_path: str, ds_diff: DatasetDiff
+    ) -> Generator[dict, None, None]:
+        from kart.tabular.feature_output import feature_as_geojson
 
-class FeatureDeltaFetcher:
+        if "feature" not in ds_diff:
+            return
+
+        old_transform, new_transform = self.get_geometry_transforms(ds_path, ds_diff)
+
+        for key, delta in self.filtered_dataset_deltas(ds_path, ds_diff):
+            if delta.old:
+                change_type = "U-" if delta.new else "D"
+                yield feature_as_geojson(
+                    delta.old_value,
+                    delta.old_key,
+                    ds_path,
+                    change_type,
+                    old_transform,
+                )
+            if delta.new:
+                change_type = "U+" if delta.old else "I"
+                yield feature_as_geojson(
+                    delta.new_value,
+                    delta.new_key,
+                    ds_path,
+                    change_type,
+                    new_transform,
+                )
+
+
+class BaseDeltaFetcher:
+    def _is_delta_value_ready(self, delta_key_value):
+        if delta_key_value is None:
+            return True
+        try:
+            delta_key_value.get_lazy_value()
+            return True
+        except KeyError as e:
+            if object_is_promised(e):
+                return False
+            raise
+
+
+class FeatureDeltaFetcher(BaseDeltaFetcher):
     """
     Given a diff Delta, either reports that it is available immediately, or kicks off a fetch so that it will be
     available soon, and adds it to the list of buffered deltas. This lets the diff writer above first output the deltas
@@ -795,19 +845,8 @@ def finish_fetching_deltas(self):
         self._fetch_process.finish()
         yield from self.buffered_deltas
 
-    def _is_delta_value_ready(self, delta_key_value):
-        if delta_key_value is None:
-            return True
-        try:
-            delta_key_value.get_lazy_value()
-            return True
-        except KeyError as e:
-            if object_is_promised(e):
-                return False
-            raise
 
-
-class NullDeltaFetcher:
+class NullDeltaFetcher(BaseDeltaFetcher):
     """
     Given a diff Delta, checks to make sure that it is immediately available. If it is not, outputs an error message.
     """
@@ -834,6 +873,3 @@ def ensure_delta_is_ready_or_start_fetch(self, key, delta):
     def finish_fetching_deltas(self):
         # Nothing to do here.
         yield from ()
-
-
-NullDeltaFetcher._is_delta_value_ready = FeatureDeltaFetcher._is_delta_value_ready
diff --git a/kart/cli_util.py b/kart/cli_util.py
index a303bbf90..a5a28a4a6 100644
--- a/kart/cli_util.py
+++ b/kart/cli_util.py
@@ -114,7 +114,7 @@ def render_posix(command_path: str) -> None:
     p.communicate()
 
 
-def render_windows(command_path: str) -> bytes:
+def render_windows(command_path: str) -> None:
     from kart import prefix
 
     text_page = Path(prefix) / "help" / f'{command_path.replace(" ", "-")}'
@@ -155,8 +155,8 @@ def login(ctx=None, username:str=None, password:str=None, token:str=None) -> Non
             print("...do what you like with the params you got...")
     """
 
-    def __init__(self, *args, **kwargs):
-        self.exclusive_with: list = kwargs.pop("exclusive_with")
+    def __init__(self, *args, exclusive_with: list[str], **kwargs):
+        self.exclusive_with = exclusive_with
 
         assert self.exclusive_with, "'exclusive_with' parameter required"
         kwargs["help"] = (
@@ -167,7 +167,7 @@ def __init__(self, *args, **kwargs):
         ).strip()
         super().__init__(*args, **kwargs)
 
-    def handle_parse_result(self, ctx, opts, args):
+    def handle_parse_result(self, ctx, opts, args: list[str]):
         current_opt: bool = self.name in opts
         for other_name in self.exclusive_with:
             if other_name in opts:
@@ -179,7 +179,7 @@ def handle_parse_result(self, ctx, opts, args):
                             f"is mutually exclusive with {other.get_error_hint(ctx)}."
                         )
                 else:
-                    self.prompt = None
+                    self.prompt = ""
         return super().handle_parse_result(ctx, opts, args)
 
 
diff --git a/kart/completion.py b/kart/completion.py
index 8a3f68ae9..d4e966415 100644
--- a/kart/completion.py
+++ b/kart/completion.py
@@ -12,10 +12,7 @@
 from kart import subprocess_util as subprocess
 
 
-try:
-    import shellingham
-except ImportError:
-    shellingham = None
+import shellingham
 
 
 def provide_default_shell():
@@ -190,12 +187,14 @@ def install_powershell(*, prog_name: str, complete_var: str, shell: str) -> Path
 def install_tab_completion(
     shell: Optional[str] = None,
 ) -> Tuple[str, Path]:
-    if shell is None and shellingham is not None:
+    if shell is None:
         try:
             shell, _ = shellingham.detect_shell()
         except shellingham.ShellDetectionFailure:
             shell = os.path.basename(provide_default_shell())
 
+    assert shell
+
     # Hardcode these variables - kart helper means that click can get mixed up between KART and KART_CLI,
     # but this means we always use KART in practise.
     kwargs = {"prog_name": "kart", "complete_var": "_KART_COMPLETE", "shell": shell}
diff --git a/kart/conflicts_util.py b/kart/conflicts_util.py
index 3dbd5ea6a..27d2556de 100644
--- a/kart/conflicts_util.py
+++ b/kart/conflicts_util.py
@@ -78,7 +78,7 @@ def _path_part_sort_key(path_part):
         return "N", path_part
 
 
-def conflicts_json_as_text(value, path="", level=0) -> str:
+def conflicts_json_as_text(value: str | int | dict | list, path="", level=0) -> str:
     """Converts the JSON output of list_conflicts to a string.
 
     The conflicts themselves should already be in the appropriate format -
@@ -96,6 +96,8 @@ def conflicts_json_as_text(value, path="", level=0) -> str:
     elif isinstance(value, list):
         indent = "    " * level
         return "".join(f"{indent}{path}{item}\n" for item in value)
+    else:
+        raise ValueError(f"Unexpected value type: {type(value)}")
 
 
 def item_to_text(key: str, value: dict, path: str, level: int) -> str:
@@ -110,7 +112,7 @@ def item_to_text(key: str, value: dict, path: str, level: int) -> str:
         return f"{styled_key_text}\n{value_text}"
 
 
-def get_key_text_color(key_text: str) -> str:
+def get_key_text_color(key_text: str) -> str | None:
     """Takes a given path and outputs an appropriate style for it
 
     The format for the key_text is:
@@ -125,3 +127,4 @@ def get_key_text_color(key_text: str) -> str:
     for key, color in style.items():
         if key_text.endswith(key):
             return color
+    return None
diff --git a/kart/conflicts_writer.py b/kart/conflicts_writer.py
index 713d5e885..acc308847 100644
--- a/kart/conflicts_writer.py
+++ b/kart/conflicts_writer.py
@@ -3,8 +3,9 @@
 import sys
 import click
 import pygit2
+from typing import ClassVar
 
-from typing import Union, Type, List
+from typing import Union, Type, List, Iterable
 from pathlib import Path
 
 from .conflicts_util import (
@@ -23,6 +24,7 @@
     ensure_conflicts_ready,
 )
 from .output_util import dump_json_output, resolve_output_path
+from .repo import KartRepo
 from . import diff_util
 
 L = logging.getLogger("kart.conflicts_writer")
@@ -33,25 +35,25 @@ class BaseConflictsWriter:
     A base class for writing conflicts output.
     """
 
+    output_format: ClassVar[str]
+
     def __init__(
         self,
-        repo: pygit2.Repository,
+        repo: KartRepo,
         user_key_filters: tuple = (),
         output_path: str = "-",
         summarise: int = 0,
         flat: bool = False,
         *,
         json_style: str = "pretty",
-        target_crs: CoordinateReferenceString = None,
-        merged_index: MergedIndex = None,
-        merge_context: MergeContext = None,
+        target_crs: CoordinateReferenceString | None = None,
+        merged_index: MergedIndex | None = None,
+        merge_context: MergeContext | None = None,
     ):
         self.repo = repo
         self.target_crs = target_crs
         self.flat = flat
         self.summarise = summarise
-        self.merge_context = merge_context
-        self.merged_index = merged_index
         self.repo_key_filter = RepoKeyFilter.build_from_user_patterns(user_key_filters)
         self.json_style = json_style
         self.output_path = self._check_output_path(
@@ -65,12 +67,14 @@ def __init__(
         )
 
         if merged_index is None:
-            self.merged_index = MergedIndex.read_from_repo(repo)
+            merged_index = MergedIndex.read_from_repo(repo)
+        self.merged_index = merged_index
         if merge_context is None:
-            self.merge_context = MergeContext.read_from_repo(repo)
+            merge_context = MergeContext.read_from_repo(repo)
+        self.merge_context = merge_context
 
     @classmethod
-    def _normalize_output_path(cls, output_path: str) -> Union[str, Path]:
+    def _normalize_output_path(cls, output_path: str | Path) -> str | Path:
         if not output_path or output_path == "-":
             return output_path
         if isinstance(output_path, str):
@@ -82,23 +86,20 @@ def _normalize_output_path(cls, output_path: str) -> Union[str, Path]:
     @classmethod
     def get_conflicts_writer_class(
         cls, output_format: str
-    ) -> Union[Type[BaseConflictsWriter], None]:
+    ) -> Type[BaseConflictsWriter] | None:
         """Returns suitable subclass for desired output format"""
-        output_format_to_writer = {
-            "quiet": QuietConflictsWriter,
-            "json": JsonConflictsWriter,
-            "text": TextConflictsWriter,
-            "geojson": GeojsonConflictsWriter,
-        }
-
-        cls.output_format = output_format
-        if not output_format_to_writer.get(output_format):
-            raise click.BadParameter(
-                f"Unrecognized output format: {output_format}",
-                param_hint="output_format",
-            )
-
-        return output_format_to_writer.get(output_format)
+        for klass in (
+            QuietConflictsWriter,
+            JsonConflictsWriter,
+            TextConflictsWriter,
+            GeojsonConflictsWriter,
+        ):
+            if klass.output_format == output_format:
+                return klass
+        raise click.BadParameter(
+            f"Unrecognized output format: {output_format}",
+            param_hint="output_format",
+        )
 
     def get_conflicts(self) -> List[RichConflict]:
         """Returns a list of rich conflicts"""
@@ -112,13 +113,12 @@ def get_conflicts(self) -> List[RichConflict]:
     def _check_output_path(
         cls,
         repo: pygit2.Repository,
-        output_path: Union[str, Path],
-        output_format: str = None,
-    ) -> Union[str, Path]:
+        output_path: str | Path,
+    ) -> str | Path:
         """Make sure the given output_path is valid for this implementation (ie, are directories supported)."""
-        if output_format and isinstance(output_path, Path) and output_path.is_dir():
+        if isinstance(output_path, Path) and output_path.is_dir():
             raise click.BadParameter(
-                f"Directory is not valid for --output with -o {output_format}",
+                f"Directory is not valid for --output with -o {cls.output_format}",
                 param_hint="--output",
             )
         return output_path
@@ -152,7 +152,7 @@ def list_conflicts(self) -> dict:
         output_dict = {}
         conflict_output = _CONFLICT_PLACEHOLDER
 
-        conflicts = self.get_conflicts()
+        conflicts: Iterable[RichConflict] = self.get_conflicts()
         if not self.repo_key_filter.match_all:
             conflicts = (c for c in conflicts if c.matches_filter(self.repo_key_filter))
 
@@ -183,6 +183,8 @@ def list_conflicts(self) -> dict:
 
 
 class QuietConflictsWriter(BaseConflictsWriter):
+    output_format = "quiet"
+
     def write_conflicts(self):
         pass
 
@@ -197,15 +199,16 @@ class JsonConflictsWriter(BaseConflictsWriter):
         {"kart.conflicts/v1": {"dataset-path": { "feature": { "id": { "ancestor/ours/theirs": [...]}}}}}
     """
 
+    output_format = "json"
+
     @classmethod
     def _check_output_path(
         cls,
         repo: pygit2.Repository,
-        output_path: Union[str, Path],
-        output_format: str = "json",
-    ) -> Union[str, Path]:
+        output_path: str | Path,
+    ) -> str | Path:
         """Make sure the given output_path is valid for this implementation (ie, are directories supported)."""
-        return super()._check_output_path(repo, output_path, output_format)
+        return super()._check_output_path(repo, output_path)
 
     def write_conflicts(self):
         output_obj = super().list_conflicts()
@@ -226,6 +229,8 @@ class TextConflictsWriter(BaseConflictsWriter):
     the changes on the ancestor, theirs and ours branch.
     """
 
+    output_format = "text"
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.fp = resolve_output_path(self.output_path)
@@ -235,11 +240,10 @@ def __init__(self, *args, **kwargs):
     def _check_output_path(
         cls,
         repo: pygit2.Repository,
-        output_path: Union[str, Path],
-        output_format: str = "text",
-    ) -> Union[str, Path]:
+        output_path: str | Path,
+    ) -> str | Path:
         """Make sure the given output_path is valid for this implementation (ie, are directories supported)."""
-        return super()._check_output_path(repo, output_path, output_format)
+        return super()._check_output_path(repo, output_path)
 
     def write_conflicts(self):
         output_dict = super().list_conflicts()
@@ -262,13 +266,19 @@ class GeojsonConflictsWriter(BaseConflictsWriter):
         Meta conflicts aren't output at all.
     """
 
+    output_format = "geojson"
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.flat = True
         self.summarise = 0
 
     @classmethod
-    def _check_output_path(cls, repo: pygit2.Repository, output_path: Path):
+    def _check_output_path(
+        cls,
+        repo: pygit2.Repository,
+        output_path: str | Path,
+    ) -> str | Path:
         """Make sure the given output_path is valid for this implementation (ie, are directories supported)."""
         if isinstance(output_path, Path):
             if output_path.is_file():
@@ -298,7 +308,7 @@ def write_conflicts(self) -> None:
                 "Need to specify a directory via --output for GeoJSON with more than one dataset",
                 param_hint="--output",
             )
-        conflicts = self.get_conflicts()
+        conflicts: Iterable[RichConflict] = self.get_conflicts()
 
         if not self.repo_key_filter.match_all:
             conflicts = (c for c in conflicts if c.matches_filter(self.repo_key_filter))
@@ -308,7 +318,7 @@ def write_conflicts(self) -> None:
         geojson_conflicts = self.get_geojson_conflicts(conflicts)
         self.output_geojson_conflicts(geojson_conflicts)
 
-    def get_geojson_conflicts(self, conflicts: List[RichConflict]) -> dict:
+    def get_geojson_conflicts(self, conflicts: Iterable[RichConflict]) -> dict:
         """Returns geojson conflicts as a dict"""
         output_dict = {}
         for conflict in conflicts:
@@ -328,7 +338,9 @@ def output_geojson_conflicts(self, json_obj: dict) -> None:
         conflicts = self.separate_geojson_conflicts_by_ds(json_obj)
 
         for ds_path, features in conflicts.items():
-            if self.output_path == "-":
+            ds_output_path: Path | str
+            if isinstance(self.output_path, str):
+                assert self.output_path == "-"
                 ds_output_path = "-"
             else:
                 ds_output_filename = str(ds_path).replace("/", "__") + ".geojson"
@@ -341,9 +353,9 @@ def output_geojson_conflicts(self, json_obj: dict) -> None:
                 json_style=self.json_style,
             )
 
-    def separate_geojson_conflicts_by_ds(self, json_obj: dict) -> dict:
+    def separate_geojson_conflicts_by_ds(self, json_obj: dict[str, dict]) -> dict:
         """Separates geojson conflicts by datasets"""
-        conflicts = dict()
+        conflicts: dict[str, list[dict]] = dict()
         for key, feature in json_obj.items():
             if "meta" == key.split(":")[1]:
                 continue
diff --git a/kart/dataset_mixins.py b/kart/dataset_mixins.py
deleted file mode 100644
index 8e9aad785..000000000
--- a/kart/dataset_mixins.py
+++ /dev/null
@@ -1,359 +0,0 @@
-from collections.abc import Iterable
-from typing import Callable, Optional, TYPE_CHECKING
-
-import pygit2
-
-from kart.diff_structs import DatasetDiff, DeltaDiff, Delta
-from kart.diff_format import DiffFormat
-from kart.key_filters import DatasetKeyFilter, MetaKeyFilter, UserStringKeyFilter
-
-
-class DatasetDiffMixin:
-    """
-    This mixin should be added to a dataset to add diffing functionality.
-
-    self.diff_meta() should work "out of the box" as long as self.meta_items() is implemented.
-    self.diff_subtree() can be called with appropriate args to generate diffs of dataset contents, eg, features.
-    """
-
-    # Returns the meta-items diff for this dataset.
-    def diff(
-        self: "BaseDataset",
-        other: Optional["BaseDataset"],
-        ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL,
-        reverse: bool = False,
-        diff_format: DiffFormat = DiffFormat.FULL,
-    ):
-        """
-        Generates a Diff from self -> other.
-        If reverse is true, generates a diff from other -> self.
-        """
-        ds_diff = DatasetDiff()
-        meta_filter = ds_filter.get("meta", ds_filter.child_type())
-        ds_diff["meta"] = self.diff_meta(other, meta_filter, reverse=reverse)
-        return ds_diff
-
-    def diff_meta(
-        self: "BaseDataset",
-        other: Optional["BaseDataset"],
-        meta_filter: UserStringKeyFilter = MetaKeyFilter.MATCH_ALL,
-        reverse: bool = False,
-    ):
-        """
-        Returns a diff from self -> other, but only for meta items.
-        If reverse is true, generates a diff from other -> self.
-        """
-        if reverse:
-            old, new = other, self
-        else:
-            old, new = self, other
-
-        meta_old = (
-            {k: v for k, v in old.meta_items().items() if k in meta_filter}
-            if old
-            else {}
-        )
-        meta_new = (
-            {k: v for k, v in new.meta_items().items() if k in meta_filter}
-            if new
-            else {}
-        )
-        return DeltaDiff.diff_dicts(meta_old, meta_new)
-
-    def diff_to_working_copy(
-        self: "BaseDataset",
-        workdir_diff_cache: "WorkdirDiffCache",
-        ds_filter: DatasetKeyFilter = DatasetKeyFilter.MATCH_ALL,
-        *,
-        convert_to_dataset_format: bool = None,
-    ):
-        """
-        Generates a diff from self to the working-copy.
-        It may be the case that only the dataset-revision used to write the working
-        copy can be used to do this (if we are tracking changes from that revision).
-        See diff_util.get_dataset_diff() to generate diffs more generally.
-        """
-        # Subclasses to override.
-        pass
-
-    def get_raw_diff_for_subtree(
-        self: "BaseDataset",
-        other: Optional["BaseDataset"],
-        subtree_name: str,
-        reverse: bool = False,
-    ):
-        """
-        Get a pygit2.Diff of the diff between some subtree of this dataset, and the same subtree of another dataset
-        (typically actually the "same" dataset, but at a different revision).
-        """
-        flags = pygit2.GIT_DIFF_SKIP_BINARY_CHECK
-        self_subtree = self.get_subtree(subtree_name)
-        other_subtree = other.get_subtree(subtree_name) if other else self._empty_tree
-
-        diff = self_subtree.diff_to_tree(other_subtree, flags=flags, swap=reverse)
-        self.L.debug(
-            "diff %s (%s -> %s / %s): %s changes",
-            subtree_name,
-            self_subtree.id,
-            other_subtree.id if other_subtree else None,
-            "R" if reverse else "F",
-            len(diff),
-        )
-        return diff
-
-    def diff_subtree(
-        self: "BaseDataset",
-        other: Optional["BaseDataset"],
-        subtree_name: str,
-        key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL,
-        *,
-        key_decoder_method: str,
-        value_decoder_method: str,
-        key_encoder_method: Optional[str] = None,
-        reverse: bool = False,
-    ):
-        """
-        A pattern for datasets to use for diffing some specific subtree. Works as follows:
-        1. Take some specific subtree of self and of a other
-           (ie self.inner_tree / "feature", other.inner_tree / "feature")
-        2. Use get_raw_diff_from_subtree to get a pygit2.Diff of the changes between those two trees.
-        3. Go through all the resulting (insert, update, delete) deltas
-        4. Fix up the paths to be relative to the dataset again (ie, prepend "feature/" onto them all
-        5. Run some transform on each path to decide what to call each item (eg decode primary key)
-        6. Run some transform on each path to load the content of each item (eg, read and decode feature)
-
-        Args:
-        other - a dataset similar to self (ie the same dataset, but at a different commit).
-            This can be None, in which case there are no items in other and they don't need to be transformed.
-        subtree_name - the name of the subtree of the dataset to scan for diffs.
-        key_filter - deltas are only yielded if they involve at least one key that matches the key filter.
-        key_decoder_method, value_decoder_method - these must be names of methods that are present in both
-            self and other - self's methods are used to decode self's items, and other's methods for other's items.
-        key_encoder_method - optional. A method that is present in both self and other that allows us to go
-            straight to the keys the user is interested in (if they have requested specific keys in the key_filter).
-        reverse - normally yields deltas from self -> other, but if reverse is True, yields deltas from other -> self.
-        """
-
-        subtree_name = subtree_name.rstrip("/")
-
-        if not key_filter.match_all and key_encoder_method is not None:
-            # Handle the case where we are only interested in a few features.
-            deltas = self.get_raw_deltas_for_keys(
-                other, key_encoder_method, key_filter, reverse=reverse
-            )
-        else:
-            raw_diff = self.get_raw_diff_for_subtree(
-                other, subtree_name, reverse=reverse
-            )
-            # NOTE - we could potentially call diff.find_similar() to detect renames here
-            deltas = self.wrap_deltas_from_raw_diff(
-                raw_diff, lambda path: f"{subtree_name}/{path}"
-            )
-
-        def _no_dataset_error(method_name):
-            raise RuntimeError(
-                f"Can't call {method_name} to decode diff deltas: dataset is None"
-            )
-
-        def get_dataset_attr(dataset, method_name):
-            if dataset is not None:
-                return getattr(dataset, method_name)
-            # This shouldn't happen:
-            return lambda x: _no_dataset_error(method_name)
-
-        if reverse:
-            old, new = other, self
-        else:
-            old, new = self, other
-
-        yield from self.transform_raw_deltas(
-            deltas,
-            key_filter,
-            old_key_transform=get_dataset_attr(old, key_decoder_method),
-            old_value_transform=get_dataset_attr(old, value_decoder_method),
-            new_key_transform=get_dataset_attr(new, key_decoder_method),
-            new_value_transform=get_dataset_attr(new, value_decoder_method),
-        )
-
-    # We treat UNTRACKED like an ADD since we don't have a staging area -
-    # if the user has untracked files, we have to assume they want to add them.
-    # (This is not actually needed right now since we are not using this for working copy diffs).
-    _INSERT_TYPES = (pygit2.GIT_DELTA_ADDED, pygit2.GIT_DELTA_UNTRACKED)
-    _UPDATE_TYPES = (pygit2.GIT_DELTA_MODIFIED,)
-    _DELETE_TYPES = (pygit2.GIT_DELTA_DELETED,)
-
-    _INSERT_UPDATE_DELETE = _INSERT_TYPES + _UPDATE_TYPES + _DELETE_TYPES
-    _INSERT_UPDATE = _INSERT_TYPES + _UPDATE_TYPES
-    _UPDATE_DELETE = _UPDATE_TYPES + _DELETE_TYPES
-
-    def get_raw_deltas_for_keys(
-        self: "BaseDataset",
-        other: Optional["BaseDataset"],
-        key_encoder_method: str,
-        key_filter: UserStringKeyFilter,
-        reverse: bool = False,
-    ):
-        """
-        Since we know which keys we are looking for, we can encode those keys and look up those blobs directly.
-        We output this as a series of deltas, just as we do when we run a normal diff, so that we can
-        take output from either code path and use it to generate a kart diff using transform_raw_deltas.
-        """
-
-        def _expand_keys(keys):
-            # If the user asks for mydataset:feature:123 they might mean str("123") or int(123) - which
-            # would be encoded differently. We look up both paths to see what we can find.
-            for key in keys:
-                yield key
-                if isinstance(key, str) and key.isdigit():
-                    yield int(key)
-
-        encode_fn = getattr(self, key_encoder_method)
-        paths = set()
-        for key in _expand_keys(key_filter):
-            try:
-                paths.add(encode_fn(key, relative=True))
-            except TypeError:
-                # The path encoder for this dataset can't encode that key, so it won't be there.
-                continue
-
-        if reverse:
-            old, new = other, self
-        else:
-            old, new = self, other
-
-        def _get_blob(dataset, path):
-            if dataset is None or dataset.inner_tree is None:
-                return None
-            try:
-                return dataset.inner_tree / path
-            except KeyError:
-                return None
-
-        for path in paths:
-            old_blob = _get_blob(old, path)
-            new_blob = _get_blob(new, path)
-            if old_blob is None and new_blob is None:
-                continue
-            if (
-                old_blob is not None
-                and new_blob is not None
-                and old_blob.oid == new_blob.oid
-            ):
-                continue
-            yield RawDiffDelta.of(
-                path if old_blob else None, path if new_blob else None
-            )
-
-    def wrap_deltas_from_raw_diff(
-        self: "BaseDataset", raw_diff: pygit2.Diff, path_transform: Callable[[str], str]
-    ):
-        for delta in raw_diff.deltas:
-            old_path = path_transform(delta.old_file.path) if delta.old_file else None
-            new_path = path_transform(delta.new_file.path) if delta.new_file else None
-            yield RawDiffDelta(delta.status, delta.status_char(), old_path, new_path)
-
-    def transform_raw_deltas(
-        self: "BaseDataset",
-        deltas: Iterable["RawDiffDelta"],
-        key_filter: UserStringKeyFilter = UserStringKeyFilter.MATCH_ALL,
-        *,
-        old_key_transform: Callable[[str], str] = lambda x: x,
-        old_value_transform: Callable[[str], str] = lambda x: x,
-        new_key_transform: Callable[[str], str] = lambda x: x,
-        new_value_transform: Callable[[str], str] = lambda x: x,
-    ):
-        """
-        Given a list of RawDiffDeltas - inserts, updates, and deletes that happened at particular paths -
-        yields a list of Kart deltas, which look something like ((old_key, old_value), (new_key, new_value)).
-        A key could be a path, a meta-item name, or a primary key value.
-
-        key-filter - deltas are discarded if they don't involve any keys that matches the key filter.
-        old/new_key_transform - converts the path into a key.
-        old/new_value_transform - converts the canonical-path into a value,
-            presumably first by loading the file contents at that path.
-
-        If any transform is not set, that transform defaults to returning the value it was input.
-        """
-        for d in deltas:
-            self.L.debug("diff(): %s %s %s", d.status_char, d.old_path, d.new_path)
-
-            if d.status not in self._INSERT_UPDATE_DELETE:
-                # RENAMED, COPIED, IGNORED, TYPECHANGE, UNMODIFIED, UNREADABLE
-                # We don't enounter these status codes in the diffs we generate.
-                raise NotImplementedError(f"Delta status: {d.status_char}")
-
-            if d.status in self._UPDATE_DELETE:
-                old_key = old_key_transform(d.old_path)
-            else:
-                old_key = None
-
-            if d.status in self._INSERT_UPDATE:
-                new_key = new_key_transform(d.new_path)
-            else:
-                new_key = None
-
-            if old_key not in key_filter and new_key not in key_filter:
-                continue
-
-            if d.status in self._INSERT_TYPES:
-                self.L.debug("diff(): insert %s (%s)", d.new_path, new_key)
-            elif d.status in self._UPDATE_TYPES:
-                self.L.debug(
-                    "diff(): update %s %s -> %s %s",
-                    d.old_path,
-                    old_key,
-                    d.new_path,
-                    new_key,
-                )
-            elif d.status in self._DELETE_TYPES:
-                self.L.debug("diff(): delete %s %s", d.old_path, old_key)
-
-            if d.status in self._UPDATE_DELETE:
-                old_half_delta = old_key, old_value_transform(d.old_path)
-            else:
-                old_half_delta = None
-
-            if d.status in self._INSERT_UPDATE:
-                new_half_delta = new_key, new_value_transform(d.new_path)
-            else:
-                new_half_delta = None
-
-            yield Delta(old_half_delta, new_half_delta)
-
-
-class RawDiffDelta:
-    """
-    Just like pygit2.DiffDelta, this simply stores the fact that a particular git blob has changed.
-    Exactly how it is changed is not stored - just the status and the blob paths.
-    Contrast with diff_structs.Delta, which is higher level - it stores information about
-    a particular feature or meta-item that has changed, and exposes the values it has changed from and to.
-
-    This is needed to fill the same purpose as pygit2.DiffDelta because pygit2.DiffDelta's can't be
-    created except by running a pygit2 diff - which we don't always want to do when generating diff deltas:
-    see get_raw_deltas_for_keys.
-    """
-
-    __slots__ = ["status", "status_char", "old_path", "new_path"]
-
-    def __init__(self, status, status_char, old_path, new_path):
-        self.status = status
-        self.status_char = status_char
-        self.old_path = old_path
-        self.new_path = new_path
-
-    @classmethod
-    def of(cls, old_path, new_path, reverse=False):
-        if reverse:
-            old_path, new_path = new_path, old_path
-
-        if old_path is None:
-            return RawDiffDelta(pygit2.GIT_DELTA_ADDED, "A", old_path, new_path)
-        elif new_path is None:
-            return RawDiffDelta(pygit2.GIT_DELTA_DELETED, "D", old_path, new_path)
-        else:
-            return RawDiffDelta(pygit2.GIT_DELTA_MODIFIED, "M", old_path, new_path)
-
-
-if TYPE_CHECKING:
-    # This is here to avoid circular imports
-    from kart.base_dataset import BaseDataset, WorkdirDiffCache
diff --git a/kart/dataset_util.py b/kart/dataset_util.py
index 4aeabe2c3..31e408a3f 100644
--- a/kart/dataset_util.py
+++ b/kart/dataset_util.py
@@ -81,7 +81,7 @@ def _validate_dataset_path(path: str) -> None:
 
 
 def validate_dataset_paths(paths: list[str]) -> None:
-    existing_paths_lower = {}
+    existing_paths_lower: dict[str, str] = {}
     for path in paths:
         _validate_dataset_path(path)
         path_lower = path.casefold()
diff --git a/kart/diff.py b/kart/diff.py
index 958352d1c..5f7a7a6c1 100644
--- a/kart/diff.py
+++ b/kart/diff.py
@@ -132,7 +132,7 @@ def feature_count_diff(
 )
 @click.option(
     "--diff-format",
-    type=click.Choice(DiffFormat),
+    type=click.Choice(list(DiffFormat)),
     default=DiffFormat.FULL,
     help="Choose the diff format: \n'full' for full diff or 'no-data-changes' for metadata and a bool indicating the feature/tile tree changes.",
 )
diff --git a/kart/diff_structs.py b/kart/diff_structs.py
index b1c0da75f..5ff7b89a1 100644
--- a/kart/diff_structs.py
+++ b/kart/diff_structs.py
@@ -145,7 +145,7 @@ def key(self):
         # This mostly works, but isn't perfect when renames are involved.
         return self.old_key if self.old_key is not None else self.new_key
 
-    def __add__(self, other):
+    def __add__(self, other: "Delta"):
         """Concatenate this delta with the subsequent delta, return the result as a single delta."""
         # Note: this method assumes that the deltas being concatenated are related,
         # ie that self.new == other.old. Don't try to concatenate arbitrary deltas together.
@@ -237,7 +237,7 @@ class RichDict(UserDict):
     or recursive_set, even if this involves creating extra dicts to contain the new value.
     """
 
-    child_type = None
+    child_type: type | tuple[type, ...] | None = None
 
     def __init__(self, *args, **kwargs):
         if type(self) == RichDict:
@@ -375,7 +375,7 @@ def __invert__(self):
             result[key] = ~value
         return result
 
-    def __add__(self, other, result=None):
+    def __add__(self, other: "Diff"):
         """Concatenate this Diff to the subsequent Diff, by concatenating all children with matching keys."""
 
         # FIXME: this algorithm isn't perfect when renames are involved.
@@ -383,8 +383,7 @@ def __add__(self, other, result=None):
         if type(self) != type(other):
             raise TypeError(f"Diff type mismatch: {type(self)} != {type(other)}")
 
-        if result is None:
-            result = self.empty_copy()
+        result = self.empty_copy()
 
         for key in self.keys() | other.keys():
             lhs = self.get(key)
@@ -399,7 +398,7 @@ def __add__(self, other, result=None):
                 result[key] = lhs if lhs is not None else rhs
         return result
 
-    def __iadd__(self, other):
+    def __iadd__(self, other: "Diff"):
         """
         Concatenate this Diff to the subsequent Diff, by concatenating all children with matching keys.
         Slightly faster than __add__, modifies self in place.
diff --git a/kart/html_diff_writer.py b/kart/html_diff_writer.py
index 856835d8e..545f47290 100644
--- a/kart/html_diff_writer.py
+++ b/kart/html_diff_writer.py
@@ -80,8 +80,3 @@ def substitute_into_template(cls, template, title, all_datasets_geojson):
                 .replace(">", r"\x3e"),
             }
         )
-
-
-HtmlDiffWriter.filtered_dataset_deltas_as_geojson = (
-    GeojsonDiffWriter.filtered_dataset_deltas_as_geojson
-)
diff --git a/kart/json_diff_writers.py b/kart/json_diff_writers.py
index 661d9bfaa..48509fff1 100644
--- a/kart/json_diff_writers.py
+++ b/kart/json_diff_writers.py
@@ -3,6 +3,7 @@
 import threading
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
+from typing import Generator
 
 import click
 
@@ -476,31 +477,3 @@ def _warn_about_any_non_feature_diffs(
                 f"Warning: {count} tile changes in {ds_path} aren't included in GeoJSON output",
                 err=True,
             )
-
-    def filtered_dataset_deltas_as_geojson(
-        self, ds_path: str, ds_diff: DatasetDiff
-    ) -> Union[None, dict]:
-        if "feature" not in ds_diff:
-            return
-
-        old_transform, new_transform = self.get_geometry_transforms(ds_path, ds_diff)
-
-        for key, delta in self.filtered_dataset_deltas(ds_path, ds_diff):
-            if delta.old:
-                change_type = "U-" if delta.new else "D"
-                yield feature_as_geojson(
-                    delta.old_value,
-                    delta.old_key,
-                    ds_path,
-                    change_type,
-                    old_transform,
-                )
-            if delta.new:
-                change_type = "U+" if delta.old else "I"
-                yield feature_as_geojson(
-                    delta.new_value,
-                    delta.new_key,
-                    ds_path,
-                    change_type,
-                    new_transform,
-                )
diff --git a/kart/key_filters.py b/kart/key_filters.py
index 17c1b496a..08b45e98c 100644
--- a/kart/key_filters.py
+++ b/kart/key_filters.py
@@ -1,5 +1,6 @@
 import fnmatch
 import re
+from typing import ClassVar
 
 import click
 
@@ -18,6 +19,8 @@ class UserStringKeyFilter(set):
     matches them against a set of strings the user has supplied.
     """
 
+    MATCH_ALL: ClassVar["UserStringKeyFilter"]
+
     def __init__(self, *args, match_all=False, **kwargs):
         super().__init__(*args, **kwargs)
         self.match_all = match_all
@@ -80,6 +83,8 @@ class KeyFilterDict(RichDict):
     at any/all keys, and that child also matches all.
     """
 
+    MATCH_ALL: ClassVar["KeyFilterDict"]
+
     def __init__(self, *args, match_all=False, **kwargs):
         super().__init__(*args, **kwargs)
         self.match_all = match_all
@@ -126,6 +131,8 @@ class DatasetKeyFilter(KeyFilterDict):
     for filtering meta items, features, tiles etc.
     """
 
+    MATCH_ALL: ClassVar["DatasetKeyFilter"]
+
     child_type = UserStringKeyFilter
     child_that_matches_all = UserStringKeyFilter(match_all=True)
 
@@ -142,7 +149,8 @@ class RepoKeyFilter(KeyFilterDict):
     for filtering items in any or all datasets.
     """
 
-    child_type = DatasetKeyFilter
+    MATCH_ALL: ClassVar["RepoKeyFilter"]
+    child_type: type = DatasetKeyFilter
     child_that_matches_all = DatasetKeyFilter(match_all=True)
 
     # https://github.com/koordinates/kart/blob/master/docs/DATASETS_v3.md#valid-dataset-names
@@ -284,6 +292,9 @@ def __setitem__(self, key, value):
         super().__setitem__(key, value)
 
 
+RepoKeyFilter.MATCH_ALL = RepoKeyFilter(match_all=True)
+
+
 class NegateKeyFilter:
     """
     A key filter that contains whatever the given delegate does not contain, and vice versa.
@@ -306,9 +317,6 @@ def __setitem__(self, key, value):
         raise NotImplementedError()
 
 
-RepoKeyFilter.MATCH_ALL = RepoKeyFilter(match_all=True)
-
-
 class DeltaFilter(set):
     """
     Filters parts of individual deltas - new or old values for inserts, updates, or deletes.
@@ -318,6 +326,8 @@ class DeltaFilter(set):
     "++" is they key for new values of inserts
     """
 
+    MATCH_ALL: ClassVar["DeltaFilter"]
+
     def __init__(self, *args, match_all=False, **kwargs):
         super().__init__(*args, **kwargs)
         self.match_all = match_all
diff --git a/kart/merge_util.py b/kart/merge_util.py
index f2cf65992..83a73180e 100644
--- a/kart/merge_util.py
+++ b/kart/merge_util.py
@@ -2,6 +2,7 @@
 import json
 import re
 from collections import namedtuple
+from typing import ClassVar
 
 import click
 import pygit2
@@ -40,12 +41,11 @@ def write_merged_index_flags(repo):
 # Utility classes relevant to merges - used by merge command, conflicts command, resolve command.
 
 
-# pygit2 always has this order - we use it too for consistency,
-# and so we can meaningfully zip() our tuples with theirs
-_ANCESTOR_OURS_THEIRS_ORDER = ("ancestor", "ours", "theirs")
-
-
-class AncestorOursTheirs(namedtuple("AncestorOursTheirs", _ANCESTOR_OURS_THEIRS_ORDER)):
+class AncestorOursTheirs(
+    # pygit2 always has this order - we use it too for consistency,
+    # and so we can meaningfully zip() our tuples with theirs
+    namedtuple("AncestorOursTheirs", ("ancestor", "ours", "theirs"))
+):
     """
     When merging two commits, we can end up with three versions of lots of things -
     mostly pygit2 IndexEntrys, but could also be paths, repositories, structures, datasets.
@@ -53,7 +53,8 @@ class AncestorOursTheirs(namedtuple("AncestorOursTheirs", _ANCESTOR_OURS_THEIRS_
     Like pygit2, we keep the 3 versions always in the same order - ancestor, ours, theirs.
     """
 
-    NAMES = _ANCESTOR_OURS_THEIRS_ORDER
+    NAMES = ("ancestor", "ours", "theirs")
+    EMPTY: ClassVar["AncestorOursTheirs"]
 
     @staticmethod
     def partial(*, ancestor=None, ours=None, theirs=None):
diff --git a/kart/meta_items.py b/kart/meta_items.py
index b732fd301..042863014 100644
--- a/kart/meta_items.py
+++ b/kart/meta_items.py
@@ -2,6 +2,7 @@
 import binascii
 import functools
 import re
+from typing import ClassVar
 
 from kart import crs_util
 from kart.schema import Schema
@@ -9,6 +10,8 @@
 
 
 class TagsJsonFileType:
+    INSTANCE: ClassVar["TagsJsonFileType"]
+
     # schema.json should be checked on read and write, by dropping any optional fields that are None.
     def decode_from_bytes(self, data):
         if data is None:
@@ -34,6 +37,8 @@ def assert_list_of_strings(self, meta_item):
 
 
 class SchemaJsonFileType:
+    INSTANCE: ClassVar["SchemaJsonFileType"]
+
     # schema.json should be normalised on read and write, by dropping any optional fields that are None.
     def decode_from_bytes(self, data):
         if data is None:
diff --git a/kart/raw_diff_delta.py b/kart/raw_diff_delta.py
new file mode 100644
index 000000000..230c8ec55
--- /dev/null
+++ b/kart/raw_diff_delta.py
@@ -0,0 +1,34 @@
+import pygit2
+
+
+class RawDiffDelta:
+    """
+    Just like pygit2.DiffDelta, this simply stores the fact that a particular git blob has changed.
+    Exactly how it is changed is not stored - just the status and the blob paths.
+    Contrast with diff_structs.Delta, which is higher level - it stores information about
+    a particular feature or meta-item that has changed, and exposes the values it has changed from and to.
+
+    This is needed to fill the same purpose as pygit2.DiffDelta because pygit2.DiffDelta's can't be
+    created except by running a pygit2 diff - which we don't always want to do when generating diff deltas:
+    see get_raw_deltas_for_keys.
+    """
+
+    __slots__ = ["status", "status_char", "old_path", "new_path"]
+
+    def __init__(self, status, status_char, old_path, new_path):
+        self.status = status
+        self.status_char = status_char
+        self.old_path = old_path
+        self.new_path = new_path
+
+    @classmethod
+    def of(cls, old_path, new_path, reverse=False):
+        if reverse:
+            old_path, new_path = new_path, old_path
+
+        if old_path is None:
+            return RawDiffDelta(pygit2.GIT_DELTA_ADDED, "A", old_path, new_path)
+        elif new_path is None:
+            return RawDiffDelta(pygit2.GIT_DELTA_DELETED, "D", old_path, new_path)
+        else:
+            return RawDiffDelta(pygit2.GIT_DELTA_MODIFIED, "M", old_path, new_path)
diff --git a/kart/repo.py b/kart/repo.py
index 1fda13203..6ec98f1d6 100644
--- a/kart/repo.py
+++ b/kart/repo.py
@@ -6,7 +6,7 @@
 from enum import Enum
 from functools import cached_property
 from pathlib import Path
-
+from typing import ClassVar
 import click
 import pygit2
 
@@ -61,6 +61,7 @@ class KartRepoFiles:
 class KartRepoState(Enum):
     NORMAL = "normal"
     MERGING = "merging"
+    ALL_STATES: ClassVar[tuple["KartRepoState", ...]]
 
     @classmethod
     def bad_state_message(cls, bad_state, allowed_states, command_extra):
diff --git a/kart/schema.py b/kart/schema.py
index 58fc4ffab..3e72f62ba 100644
--- a/kart/schema.py
+++ b/kart/schema.py
@@ -14,6 +14,7 @@
     msg_unpack,
     sha256,
 )
+from kart.utils import classproperty
 
 
 class Legend:
@@ -165,8 +166,7 @@ class ColumnSchema(dict):
         "length",
     )
 
-    @classmethod
-    @property
+    @classproperty
     @functools.lru_cache(maxsize=1)
     def sort_order_dict(cls):
         return {
diff --git a/kart/show.py b/kart/show.py
index 6bc4f9c43..1f071e949 100644
--- a/kart/show.py
+++ b/kart/show.py
@@ -79,7 +79,7 @@
 )
 @click.option(
     "--diff-format",
-    type=click.Choice(DiffFormat),
+    type=click.Choice(list(DiffFormat)),
     default=DiffFormat.FULL,
     help="Choose the diff format: \n'full' for full diff, 'none' for viewing commit metadata only, or 'no-data-changes' for metadata and a bool indicating the feature/tile tree changes.",
 )
diff --git a/kart/spatial_filter/__init__.py b/kart/spatial_filter/__init__.py
index 7c81b827b..f781aae5e 100644
--- a/kart/spatial_filter/__init__.py
+++ b/kart/spatial_filter/__init__.py
@@ -4,6 +4,7 @@
 import re
 import sys
 from enum import Enum, auto
+from typing import ClassVar
 
 import click
 import pygit2
@@ -454,6 +455,9 @@ class SpatialFilter:
     call SpatialFilter.transform_for_dataset or SpatialFilter.transform_for_crs
     """
 
+    _MATCH_ALL: ClassVar["SpatialFilter"]
+    MATCH_ALL: ClassVar["SpatialFilter"]
+
     @property
     def is_original(self):
         # Overridden by OriginalSpatialFilter.
@@ -635,6 +639,8 @@ class OriginalSpatialFilter(SpatialFilter):
     That is why only OriginalSpatialFilter supports transformation.
     """
 
+    _MATCH_ALL: ClassVar["OriginalSpatialFilter"]
+
     def __init__(self, crs_spec, geometry_spec, match_all=False):
         if match_all:
             super().__init__(None, None, match_all=True)
diff --git a/kart/spatial_filter/index.py b/kart/spatial_filter/index.py
index 3ad781ce6..8837d2cbd 100644
--- a/kart/spatial_filter/index.py
+++ b/kart/spatial_filter/index.py
@@ -25,28 +25,24 @@
 
 
 L = logging.getLogger("kart.spatial_filter.index")
+_bulk_warns: dict[str, int] = {}
+_bulk_warn_samples: dict[str, object] = {}
 
 
-def buffered_bulk_warn(self, message, sample):
+def buffered_bulk_warn(message, sample):
     """For logging lots of identical warnings, but only actually outputs once per time flush_bulk_warns is called."""
-    self.bulk_warns.setdefault(message, 0)
-    self.bulk_warns[message] = abs(self.bulk_warns[message]) + 1
-    self.bulk_warn_samples[message] = sample
+    _bulk_warns.setdefault(message, 0)
+    _bulk_warns[message] = abs(_bulk_warns[message]) + 1
+    _bulk_warn_samples[message] = sample
 
 
-def flush_bulk_warns(self):
+def flush_bulk_warns():
     """Output the number of occurrences of each type of buffered_bulk_warn message."""
-    for message, occurrences in self.bulk_warns.items():
+    for message, occurrences in _bulk_warns.items():
         if occurrences > 0:
-            sample = self.bulk_warn_samples[message]
-            self.warn(f"{message} ({occurrences} total occurences)\nSample: {sample}")
-            self.bulk_warns[message] = -occurrences
-
-
-L.buffered_bulk_warn = buffered_bulk_warn.__get__(L)
-L.flush_bulk_warns = flush_bulk_warns.__get__(L)
-L.bulk_warns = {}
-L.bulk_warn_samples = {}
+            sample = _bulk_warn_samples[message]
+            L.warn(f"{message} ({occurrences} total occurences)\nSample: {sample}")
+            _bulk_warns[message] = -occurrences
 
 
 class CrsHelper:
@@ -353,7 +349,7 @@ def update_spatial_filter_index(
         ):
             if i and progress_every and i % progress_every == 0:
                 click.echo(f"  {i:,d} features... @{time.monotonic()-t0:.1f}s")
-                L.flush_bulk_warns()
+                flush_bulk_warns()
 
             ds_path = path_match_result.group(1)
             transforms = crs_helper.transforms_for_dataset_at_commit(
@@ -378,7 +374,7 @@ def update_spatial_filter_index(
             )
 
         click.echo(f"  {i:,d} features... @{time.monotonic()-t0:.1f}s")
-        L.flush_bulk_warns()
+        flush_bulk_warns()
 
         # Update indexed commits.
         params = [(bytes.fromhex(commit_id),) for commit_id in all_independent_commits]
@@ -498,22 +494,21 @@ def _debug_encoded_envelope(arg):
 
 NO_GEOMETRY_COLUMN = object()
 
+_legend_to_col_id: dict[str, int] = {}
+
 
 def get_geometry(repo, feature_blob):
     legend, fields = msg_unpack(memoryview(feature_blob))
-    col_id = get_geometry.legend_to_col_id.get(legend)
+    col_id = _legend_to_col_id.get(legend)
     if col_id is None:
         col_id = _find_geometry_column(fields)
     if col_id is None:
         return None
-    get_geometry.legend_to_col_id[legend] = col_id
+    _legend_to_col_id[legend] = col_id
     return fields[col_id] if col_id is not NO_GEOMETRY_COLUMN else None
 
 
-get_geometry.legend_to_col_id = {}
-
-
-def _find_geometry_column(fields):
+def _find_geometry_column(fields) -> int | object | None:
     result = NO_GEOMETRY_COLUMN
     for i, field in enumerate(fields):
         if isinstance(field, Geometry):
@@ -619,22 +614,22 @@ def get_envelope_for_indexing(geom, transforms, feature_desc):
                 envelope = transform_minmax_envelope(minmax_envelope, transform)
             except CannotIndex as e:
                 if isinstance(e, CannotIndexDueToWrongCrs) and len(transforms) > 1:
-                    L.buffered_bulk_warn(
+                    buffered_bulk_warn(
                         f"Skipped obviously bad transform {transform.desc}",
                         feature_desc,
                     )
                     continue
-                L.buffered_bulk_warn("Skipped indexing feature", feature_desc)
+                buffered_bulk_warn("Skipped indexing feature", feature_desc)
                 return None
 
             result = union_of_envelopes(result, envelope)
 
         if result is None:
-            L.buffered_bulk_warn("Skipped indexing feature", feature_desc)
+            buffered_bulk_warn("Skipped indexing feature", feature_desc)
             return None
 
         if not _is_valid_envelope(result):
-            L.buffered_bulk_warn(
+            buffered_bulk_warn(
                 "Couldn't index feature - resulting envelope not valid", feature_desc
             )
             return None
diff --git a/kart/sqlalchemy/adapter/postgis.py b/kart/sqlalchemy/adapter/postgis.py
index 6612feac0..98612fe76 100644
--- a/kart/sqlalchemy/adapter/postgis.py
+++ b/kart/sqlalchemy/adapter/postgis.py
@@ -487,7 +487,6 @@ def python_postread(self, value):
         return str(value) if value is not None else None
 
 
-@staticmethod
 def is_postgis_installed(session):
     # Run a query to see if PostGIS is installed.
     query = "SELECT * FROM pg_extension WHERE extname = 'postgis';"
diff --git a/kart/sqlalchemy/adapter/sqlserver.py b/kart/sqlalchemy/adapter/sqlserver.py
index b5f830e34..5fbdcd023 100644
--- a/kart/sqlalchemy/adapter/sqlserver.py
+++ b/kart/sqlalchemy/adapter/sqlserver.py
@@ -10,7 +10,7 @@
 from kart.schema import ColumnSchema, Schema
 from kart.utils import ungenerator
 from sqlalchemy.ext.compiler import compiles
-from sqlalchemy.sql import quoted_name
+from sqlalchemy.sql import quoted_name  # type: ignore[attr-defined]
 from sqlalchemy.sql.functions import Function
 
 
diff --git a/kart/sqlalchemy/sqlserver.py b/kart/sqlalchemy/sqlserver.py
index 28c2b319e..efdf13892 100644
--- a/kart/sqlalchemy/sqlserver.py
+++ b/kart/sqlalchemy/sqlserver.py
@@ -3,9 +3,9 @@
 from urllib.parse import urlsplit, urlunsplit
 
 import sqlalchemy
-from kart.exceptions import NO_DRIVER, NotFound
-from sqlalchemy.dialects.mssql.base import MSDialect, MSIdentifierPreparer
+from sqlalchemy.dialects.mssql.base import MSDialect, MSIdentifierPreparer  # type: ignore[attr-defined]
 
+from kart.exceptions import NO_DRIVER, NotFound
 from .base import BaseDb
 
 L = logging.getLogger("kart.sqlalchemy.sqlserver")
diff --git a/kart/structure.py b/kart/structure.py
index 1ae947de9..e8411317e 100644
--- a/kart/structure.py
+++ b/kart/structure.py
@@ -21,7 +21,11 @@
 from . import list_of_conflicts
 from .pack_util import packfile_object_builder
 from .schema import Schema
-from .tabular.version import extra_blobs_for_version, dataset_class_for_version
+from .tabular.version import (
+    extra_blobs_for_version,
+    dataset_class_for_version,
+    DEFAULT_NEW_REPO_VERSION,
+)
 from .structs import CommitWithReference
 from .unsupported_dataset import UnsupportedDataset
 from .working_copy import ALL_PART_TYPES
@@ -232,7 +236,7 @@ def create_tree_from_diff(
         if not self.tree:
             # This is the first commit to this branch - we may need to add extra blobs
             # to the tree to mark this data as being of a particular version.
-            extra_blobs = extra_blobs_for_version(self.version)
+            extra_blobs = extra_blobs_for_version(DEFAULT_NEW_REPO_VERSION)
             for path, blob in extra_blobs:
                 object_builder.insert(path, blob)
 
diff --git a/kart/tabular/import_source.py b/kart/tabular/import_source.py
index 2194c1abe..fd451c82b 100644
--- a/kart/tabular/import_source.py
+++ b/kart/tabular/import_source.py
@@ -73,16 +73,16 @@ def dest_path(self):
             return self._dest_path
         return self.default_dest_path()
 
+    @dest_path.setter
+    def dest_path(self, dest_path):
+        self._dest_path = self._normalise_dataset_path(dest_path)
+
     @classmethod
     def _normalise_dataset_path(cls, path):
         # we treat back-slash and forward-slash as equivalent at import time.
         # (but we only ever import forward-slashes)
         return path.strip("/").replace("\\", "/")
 
-    @dest_path.setter
-    def dest_path(self, dest_path):
-        self._dest_path = self._normalise_dataset_path(dest_path)
-
     def default_dest_path(self):
         """
         The default destination path where this dataset should be imported.
@@ -249,7 +249,9 @@ def get_table(value):
                 except ValueError:
                     if value in table_list:
                         return value
-                raise click.BadParameter(f"Please enter a number between 1 and {len(table_list)} or a valid table name")
+                raise click.BadParameter(
+                    f"Please enter a number between 1 and {len(table_list)} or a valid table name"
+                )
 
             t_default = table_list[0] if len(table_list) == 1 else None
             return click.prompt(
diff --git a/kart/tabular/rich_table_dataset.py b/kart/tabular/rich_table_dataset.py
index 1a14e8e88..12fae876d 100644
--- a/kart/tabular/rich_table_dataset.py
+++ b/kart/tabular/rich_table_dataset.py
@@ -130,7 +130,7 @@ def diff(
             other_subtree = other.get_subtree("feature") if other else self._empty_tree
             data_changes = self_subtree != other_subtree
 
-            ds_diff["data_changes"]: bool = data_changes
+            ds_diff["data_changes"] = data_changes
 
         # Else do a full diff.
         else:
diff --git a/kart/tabular/v2.py b/kart/tabular/v2.py
index aef424185..8cb6947d6 100644
--- a/kart/tabular/v2.py
+++ b/kart/tabular/v2.py
@@ -1,3 +1,4 @@
+from typing import ClassVar
 from kart.meta_items import MetaItemDefinition, MetaItemFileType
 from kart.tabular.v3 import TableV3
 from kart.tabular.v3_paths import PathEncoder
@@ -8,7 +9,7 @@ class TableV2(TableV3):
 
     DATASET_DIRNAME = ".sno-dataset"  # Old name for V2 datasets.
 
-    META_ITEMS = TableV3.META_ITEMS + (
+    META_ITEMS: ClassVar[tuple[MetaItemDefinition, ...]] = TableV3.META_ITEMS + (
         MetaItemDefinition("metadata/dataset.json", MetaItemFileType.JSON),
     )
 
diff --git a/kart/tabular/v3.py b/kart/tabular/v3.py
index 98e11978e..b290a51b6 100644
--- a/kart/tabular/v3.py
+++ b/kart/tabular/v3.py
@@ -1,7 +1,7 @@
 import functools
 import os
 import re
-
+from typing import ClassVar
 import pygit2
 
 from kart.core import all_blobs_in_tree
@@ -86,7 +86,7 @@ class TableV3(RichTableDataset):
         MetaItemVisibility.INTERNAL_ONLY,
     )
 
-    META_ITEMS = (
+    META_ITEMS: ClassVar[tuple[MetaItemDefinition, ...]] = (
         TITLE,
         DESCRIPTION,
         TAGS_JSON,
diff --git a/kart/tabular/v3_paths.py b/kart/tabular/v3_paths.py
index 1088b0c76..ff4839313 100644
--- a/kart/tabular/v3_paths.py
+++ b/kart/tabular/v3_paths.py
@@ -111,6 +111,10 @@ class PathEncoder:
     which Kart continues to support.
     """
 
+    LEGACY_ENCODER: "PathEncoder"
+    INT_PK_ENCODER: "PathEncoder"
+    GENERAL_ENCODER: "PathEncoder"
+
     PATH_STRUCTURE_ITEM = "path-structure.json"
 
     @staticmethod
diff --git a/kart/tabular/working_copy/base.py b/kart/tabular/working_copy/base.py
index 002ba87e3..2c4652038 100644
--- a/kart/tabular/working_copy/base.py
+++ b/kart/tabular/working_copy/base.py
@@ -43,7 +43,7 @@ class TableWorkingCopy(WorkingCopyPart):
     """
 
     # Subclasses should override if they can support more meta-items eg description or metadata.xml
-    SUPPORTED_META_ITEMS = (
+    SUPPORTED_META_ITEMS: tuple[meta_items.MetaItemDefinition, ...] = (
         meta_items.TITLE,
         meta_items.SCHEMA_JSON,
         meta_items.CRS_DEFINITIONS,
@@ -1455,10 +1455,8 @@ def soft_reset_after_commit(
     def _check_for_unsupported_ds_types(self, sess, target_datasets, schema="public"):
         pass
 
-
-# Alias state_session to session:
-# a session the working-copy in general is also a good session for reading/writing the state table.
-TableWorkingCopy.state_session = TableWorkingCopy.session
+    def state_session(self):
+        return self.session()
 
 
 @contextlib.contextmanager
diff --git a/kart/tabular/working_copy/db_server.py b/kart/tabular/working_copy/db_server.py
index 9f5ccd11f..448371940 100644
--- a/kart/tabular/working_copy/db_server.py
+++ b/kart/tabular/working_copy/db_server.py
@@ -4,10 +4,11 @@
 from urllib.parse import urlsplit, urlunsplit
 
 import click
+from sqlalchemy.exc import DBAPIError
 
 from kart.exceptions import DbConnectionError, InvalidOperation
 from kart.sqlalchemy import DbType, strip_password
-from sqlalchemy.exc import DBAPIError
+from kart.utils import classproperty
 
 from . import TableWorkingCopyStatus
 from .base import TableWorkingCopy
@@ -16,8 +17,7 @@
 class DatabaseServer_WorkingCopy(TableWorkingCopy):
     """Functionality common to working copies that connect to a database server."""
 
-    @property
-    @classmethod
+    @classproperty
     def URI_SCHEME(cls):
         """The URI scheme to connect to this type of database, eg "postgresql"."""
         raise NotImplementedError()
diff --git a/kart/tabular/working_copy/gpkg.py b/kart/tabular/working_copy/gpkg.py
index 2fe75964d..9aaf7497b 100644
--- a/kart/tabular/working_copy/gpkg.py
+++ b/kart/tabular/working_copy/gpkg.py
@@ -696,6 +696,3 @@ def _update_gpkg_contents(self, sess, dataset, commit=None):
                 {"last_change": gpkg_change_time, "table_name": dataset.table_name},
             ).rowcount
         assert rc == 1, f"gpkg_contents update: expected 1Δ, got {rc}"
-
-
-WorkingCopy_GPKG.state_session = WorkingCopy_GPKG.session
diff --git a/kart/tabular/working_copy/sqlserver.py b/kart/tabular/working_copy/sqlserver.py
index 05912098d..bf44070ef 100644
--- a/kart/tabular/working_copy/sqlserver.py
+++ b/kart/tabular/working_copy/sqlserver.py
@@ -6,7 +6,7 @@
 from kart.sqlalchemy import separate_last_path_part, text_with_inlined_params
 from kart.sqlalchemy.adapter.sqlserver import KartAdapter_SqlServer
 from kart.schema import Schema
-from sqlalchemy.dialects.mssql.base import MSIdentifierPreparer
+from sqlalchemy.dialects.mssql.base import MSIdentifierPreparer  # type: ignore[attr-defined]
 from sqlalchemy.orm import sessionmaker
 
 from .db_server import DatabaseServer_WorkingCopy
diff --git a/kart/tile/importer.py b/kart/tile/importer.py
index 2da614bba..e52fa4145 100644
--- a/kart/tile/importer.py
+++ b/kart/tile/importer.py
@@ -122,7 +122,7 @@ def __init__(
 
     # A dict of the form {sidecar-key-prefix: sidecar-filename-suffix}
     # For example: {"pam": ".aux.xml"} since we use the prefix pam to label keys from .aux.xml aka PAM files.
-    SIDECAR_FILES = {}
+    SIDECAR_FILES: dict[str, str] = {}
 
     @cached_property
     def ALLOWED_SCHEMES(self):
diff --git a/kart/tile/tile_dataset.py b/kart/tile/tile_dataset.py
index 1b456b1e9..7fce0d69e 100644
--- a/kart/tile/tile_dataset.py
+++ b/kart/tile/tile_dataset.py
@@ -1,6 +1,6 @@
 import functools
 import os
-
+from typing import ClassVar
 from kart.base_dataset import BaseDataset
 from kart.core import all_blobs_with_parent_in_tree
 from kart.decorators import allow_classmethod
@@ -61,7 +61,7 @@ class TileDataset(BaseDataset):
     LINKED_STORAGE_JSON = meta_items.LINKED_STORAGE_JSON
 
     # Subclasses may override to add extra meta-items.
-    META_ITEMS = (
+    META_ITEMS: ClassVar[tuple["MetaItemDefinition", ...]] = (
         TITLE,
         DESCRIPTION,
         TAGS_JSON,
@@ -283,7 +283,7 @@ def extract_tile_metadata(cls, path, oid_and_size=None):
 
     def diff(
         self,
-        other,
+        other: BaseDataset | None,
         ds_filter=DatasetKeyFilter.MATCH_ALL,
         reverse=False,
         diff_format=DiffFormat.FULL,
@@ -302,7 +302,7 @@ def diff(
             other_subtree = other.get_subtree("tile") if other else self._empty_tree
             data_changes = self_subtree != other_subtree
 
-            ds_diff["data_changes"]: bool = data_changes
+            ds_diff["data_changes"] = data_changes
 
         # Else do a full diff.
         else:
@@ -378,7 +378,7 @@ def tile_metadata(self):
             "crs.wkt": self.get_meta_item("crs.wkt"),
         }
 
-    def is_cloud_optimized():
+    def is_cloud_optimized(self) -> bool:
         """Returns True if this dataset is constrained so that only cloud-optimized tiles can be added to it."""
         raise NotImplementedError()
 
diff --git a/kart/utils.py b/kart/utils.py
index 7ae478d97..f4c123b31 100644
--- a/kart/utils.py
+++ b/kart/utils.py
@@ -68,3 +68,11 @@ def get_num_available_cores():
         # sched_getaffinity isn't available on some platforms (macOS mostly I think)
         # Fallback to total machine CPUs
         return float(os.cpu_count())
+
+
+class classproperty:
+    def __init__(self, getter):
+        self.fget = getter
+
+    def __get__(self, cls, owner):
+        return self.fget(owner)
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 000000000..8e65dbf96
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,34 @@
+[mypy]
+# when checking specific files (pre-commit), it's annoying when mypy reports a ton of errors in other files...
+# NOTE: this seems to be the default when run on Linux but not on MacOS? (Undocumented though)
+follow_imports = silent
+files = kart/
+python_version = 3.11
+
+[mypy-pysqlite3.*]
+ignore_missing_imports = true
+
+[mypy-osgeo.*]
+ignore_missing_imports = true
+
+[mypy-ipdb.*]
+ignore_missing_imports = true
+
+[mypy-shellingham.*]
+ignore_missing_imports = true
+
+[mypy-reflink.*]
+ignore_missing_imports = true
+
+[mypy-boto3.*]
+ignore_missing_imports = true
+
+[mypy-botocore.*]
+ignore_missing_imports = true
+
+[mypy-pyodbc.*]
+ignore_missing_imports = true
+
+[mypy-pygit2.*]
+# TODO: remove this when upgrading to pygit2 1.16+, which has builtin type hints
+ignore_missing_imports = true
diff --git a/requirements/dev.in b/requirements/dev.in
index 8d2b111be..e0d0617e4 100644
--- a/requirements/dev.in
+++ b/requirements/dev.in
@@ -19,9 +19,8 @@ mypy
     # note: not maintained; not compatible with sqlalchemy 2.x
     # so, probably drop this when we upgrade sqlalchemy
     sqlalchemy-stubs==0.4
-    # NOTE: we actually still have pygit2 1.12, but this is the first release of types-pygit2
-    # TODO: upgrade to pygit2 1.16+ and use the builtin types rather than using this package.
-    types-pygit2==1.14.0.20240317
-
-    types-tqdm
+    msgpack-types
+    types-jsonschema
+    types-psycopg2
     types-Pygments==2.13.0
+    types-tqdm
diff --git a/requirements/dev.txt b/requirements/dev.txt
index ee7fcf9c6..db169472c 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,37 +1,50 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile dev.in -o dev.txt
+#    uv pip compile requirements/dev.in -o requirements/dev.txt
 appnope==0.1.3
     # via
-    #   -r dev.in
+    #   -r requirements/dev.in
     #   ipython
 asttokens==2.2.1
     # via stack-data
+attrs==22.1.0
+    # via
+    #   -c requirements/requirements.txt
+    #   -c requirements/test.txt
+    #   referencing
 backcall==0.2.0
     # via ipython
 colorama==0.4.6
     # via
-    #   -c docs.txt
-    #   -c test.txt
-    #   -r dev.in
+    #   -c requirements/docs.txt
+    #   -c requirements/test.txt
+    #   -r requirements/dev.in
 decorator==5.1.1
     # via
     #   ipdb
     #   ipython
 executing==1.2.0
     # via stack-data
+idna==3.7
+    # via
+    #   -c requirements/docs.txt
+    #   yarl
 ipdb==0.13.11
-    # via -r dev.in
+    # via -r requirements/dev.in
 ipython==8.10.0
     # via
-    #   -r dev.in
+    #   -r requirements/dev.in
     #   ipdb
 jedi==0.18.2
     # via ipython
 matplotlib-inline==0.1.6
     # via ipython
+msgpack-types==0.3.0
+    # via -r requirements/dev.in
+multidict==6.1.0
+    # via yarl
 mypy==1.13.0
     # via
-    #   -r dev.in
+    #   -r requirements/dev.in
     #   sqlalchemy-stubs
 mypy-extensions==1.0.0
     # via mypy
@@ -39,52 +52,60 @@ parso==0.8.3
     # via jedi
 pexpect==4.8.0
     # via
-    #   -r dev.in
+    #   -r requirements/dev.in
     #   ipython
 pickleshare==0.7.5
     # via ipython
 prompt-toolkit==3.0.36
     # via ipython
+propcache==0.2.0
+    # via yarl
 ptyprocess==0.7.0
     # via pexpect
 pure-eval==0.2.2
     # via stack-data
 pygments==2.13.0
     # via
-    #   -c docs.txt
-    #   -c requirements.txt
+    #   -c requirements/docs.txt
+    #   -c requirements/requirements.txt
     #   ipython
+pyrsistent==0.19.2
+    # via
+    #   -c requirements/requirements.txt
+    #   referencing
+referencing==0.8.11
+    # via types-jsonschema
 six==1.16.0
     # via
-    #   -c docs.txt
-    #   -c requirements.txt
-    #   -c test.txt
+    #   -c requirements/docs.txt
+    #   -c requirements/requirements.txt
+    #   -c requirements/test.txt
     #   asttokens
 sqlalchemy-stubs==0.4
-    # via -r dev.in
+    # via -r requirements/dev.in
 stack-data==0.6.2
     # via ipython
 traitlets==5.7.1
     # via
     #   ipython
     #   matplotlib-inline
-types-cffi==1.16.0.20240331
-    # via types-pygit2
 types-docutils==0.21.0.20241005
     # via types-pygments
-types-pygit2==1.14.0.20240317
-    # via -r dev.in
+types-jsonschema==4.23.0.20240813
+    # via -r requirements/dev.in
+types-psycopg2==2.9.21.20241019
+    # via -r requirements/dev.in
 types-pygments==2.13.0
-    # via -r dev.in
+    # via -r requirements/dev.in
 types-setuptools==75.3.0.20241107
-    # via
-    #   types-cffi
-    #   types-pygments
+    # via types-pygments
 types-tqdm==4.66.0.20240417
-    # via -r dev.in
+    # via -r requirements/dev.in
 typing-extensions==4.12.2
     # via
     #   mypy
     #   sqlalchemy-stubs
 wcwidth==0.2.5
     # via prompt-toolkit
+yarl==1.17.1
+    # via referencing

From b0d2d58e386c9b2703cf0faa66ab08720cb0a0d7 Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Sat, 9 Nov 2024 21:58:44 +1300
Subject: [PATCH 3/8] Bump click to avoid some spurious mypy errors

https://github.com/pallets/click/issues/2558
---
 requirements/dev.txt          | 32 ++++++++++++++++++--------------
 requirements/docs.txt         |  2 +-
 requirements/requirements.in  |  2 +-
 requirements/requirements.txt | 22 +++++++++++-----------
 requirements/test.txt         | 10 ++++------
 5 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/requirements/dev.txt b/requirements/dev.txt
index db169472c..ebdcb47d6 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,8 +1,12 @@
-# This file was autogenerated by uv via the following command:
-#    uv pip compile requirements/dev.in -o requirements/dev.txt
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    "cmake --build build --target py-requirements"
+#
 appnope==0.1.3
     # via
-    #   -r requirements/dev.in
+    #   -r dev.in
     #   ipython
 asttokens==2.2.1
     # via stack-data
@@ -17,7 +21,7 @@ colorama==0.4.6
     # via
     #   -c requirements/docs.txt
     #   -c requirements/test.txt
-    #   -r requirements/dev.in
+    #   -r dev.in
 decorator==5.1.1
     # via
     #   ipdb
@@ -29,22 +33,22 @@ idna==3.7
     #   -c requirements/docs.txt
     #   yarl
 ipdb==0.13.11
-    # via -r requirements/dev.in
+    # via -r dev.in
 ipython==8.10.0
     # via
-    #   -r requirements/dev.in
+    #   -r dev.in
     #   ipdb
 jedi==0.18.2
     # via ipython
 matplotlib-inline==0.1.6
     # via ipython
 msgpack-types==0.3.0
-    # via -r requirements/dev.in
+    # via -r dev.in
 multidict==6.1.0
     # via yarl
 mypy==1.13.0
     # via
-    #   -r requirements/dev.in
+    #   -r dev.in
     #   sqlalchemy-stubs
 mypy-extensions==1.0.0
     # via mypy
@@ -52,7 +56,7 @@ parso==0.8.3
     # via jedi
 pexpect==4.8.0
     # via
-    #   -r requirements/dev.in
+    #   -r dev.in
     #   ipython
 pickleshare==0.7.5
     # via ipython
@@ -82,7 +86,7 @@ six==1.16.0
     #   -c requirements/test.txt
     #   asttokens
 sqlalchemy-stubs==0.4
-    # via -r requirements/dev.in
+    # via -r dev.in
 stack-data==0.6.2
     # via ipython
 traitlets==5.7.1
@@ -92,15 +96,15 @@ traitlets==5.7.1
 types-docutils==0.21.0.20241005
     # via types-pygments
 types-jsonschema==4.23.0.20240813
-    # via -r requirements/dev.in
+    # via -r dev.in
 types-psycopg2==2.9.21.20241019
-    # via -r requirements/dev.in
+    # via -r dev.in
 types-pygments==2.13.0
-    # via -r requirements/dev.in
+    # via -r dev.in
 types-setuptools==75.3.0.20241107
     # via types-pygments
 types-tqdm==4.66.0.20240417
-    # via -r requirements/dev.in
+    # via -r dev.in
 typing-extensions==4.12.2
     # via
     #   mypy
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 314d2f11f..fd9d67b8f 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    "cmake --build build --target py-requirements"
diff --git a/requirements/requirements.in b/requirements/requirements.in
index 33ef03a02..c53f4162f 100644
--- a/requirements/requirements.in
+++ b/requirements/requirements.in
@@ -1,6 +1,6 @@
 boto3>=1.29.1
 certifi
-click~=8.1
+click~=8.1.7
 docutils<0.18
 msgpack~=0.6.1
 Pygments
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index ecaebc0f1..f94696e88 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    "cmake --build build --target py-requirements"
@@ -16,20 +16,20 @@ certifi==2022.12.7
     # via -r requirements.in
 #cffi==1.16.0
     # via
-    #   -r vendor-wheels.txt
+    #   -r requirements/vendor-wheels.txt
     #   cryptography
     #   pygit2
     #   reflink
-click==8.1.4
+click==8.1.7
     # via -r requirements.in
-#cryptography==41.0.4
-    # via -r vendor-wheels.txt
+#cryptography==42.0.4
+    # via -r requirements/vendor-wheels.txt
 docutils==0.17.1
     # via
     #   -r requirements.in
     #   rst2txt
 #gdal==3.8.0
-    # via -r vendor-wheels.txt
+    # via -r requirements/vendor-wheels.txt
 jmespath==1.0.1
     # via
     #   boto3
@@ -39,11 +39,11 @@ jsonschema==4.17.3
 msgpack==0.6.2
     # via -r requirements.in
 #psycopg2==2.9.9
-    # via -r vendor-wheels.txt
+    # via -r requirements/vendor-wheels.txt
 pycparser==2.21
     # via cffi
 #pygit2==1.12.1
-    # via -r vendor-wheels.txt
+    # via -r requirements/vendor-wheels.txt
 pygments==2.13.0
     # via
     #   -r requirements.in
@@ -51,15 +51,15 @@ pygments==2.13.0
 pymysql==1.0.2
     # via -r requirements.in
 pyodbc==5.1.0
-    # via -r vendor-wheels.txt
+    # via -r requirements/vendor-wheels.txt
 pyrsistent==0.19.2
     # via jsonschema
 #pysqlite3==0.5.2
-    # via -r vendor-wheels.txt
+    # via -r requirements/vendor-wheels.txt
 python-dateutil==2.8.2
     # via botocore
 #reflink==0.2.2
-    # via -r vendor-wheels.txt
+    # via -r requirements/vendor-wheels.txt
 rst2txt==1.1.0
     # via -r requirements.in
 s3transfer==0.7.0
diff --git a/requirements/test.txt b/requirements/test.txt
index cc3dc47ca..d6b8d2a08 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    "cmake --build build --target py-requirements"
@@ -10,14 +10,12 @@ atomicwrites==1.4.1
     # via -r test.in
 attrs==22.1.0
     # via
-    #   -c requirements.txt
+    #   -c requirements/requirements.txt
     #   pytest
 colorama==0.4.6
     # via -r test.in
 coverage[toml]==6.5.0
-    # via
-    #   coverage
-    #   pytest-cov
+    # via pytest-cov
 execnet==1.9.0
     # via pytest-xdist
 fields==5.0.0
@@ -65,7 +63,7 @@ pytest-xdist==3.1.0
     # via -r test.in
 six==1.16.0
     # via
-    #   -c requirements.txt
+    #   -c requirements/requirements.txt
     #   html5lib
     #   pytest-profiling
 termcolor==2.1.1

From b4c8b01d86b18d1d3c89dac364f049d845d06c8f Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Sat, 9 Nov 2024 19:34:15 +1300
Subject: [PATCH 4/8] Ignore mypy errors for click's shell_complete kwarg

This is absent from click's type inline types somehow.
Refs https://github.com/pallets/click/issues/2802
---
 kart/apply.py               |  2 +-
 kart/checkout.py            | 22 ++++++++++++++++++----
 kart/data.py                |  7 ++++++-
 kart/diff.py                |  2 +-
 kart/export.py              |  2 +-
 kart/import_.py             |  2 +-
 kart/init.py                |  2 +-
 kart/log.py                 |  2 +-
 kart/point_cloud/import_.py |  2 +-
 kart/pull.py                |  6 +++++-
 kart/raster/import_.py      |  2 +-
 kart/resolve.py             |  2 +-
 kart/show.py                |  7 +++++--
 kart/tabular/export.py      |  2 +-
 kart/tabular/import_.py     |  2 +-
 15 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/kart/apply.py b/kart/apply.py
index 2b962185a..ca9e877dc 100644
--- a/kart/apply.py
+++ b/kart/apply.py
@@ -370,7 +370,7 @@ def apply_patch(
     "--ref",
     default="HEAD",
     help="Which ref to apply the patch onto.",
-    shell_complete=ref_completer,
+    shell_complete=ref_completer,  # type: ignore[call-arg]
 )
 @click.option(
     "--amend",
diff --git a/kart/checkout.py b/kart/checkout.py
index aaaa73386..718845f58 100644
--- a/kart/checkout.py
+++ b/kart/checkout.py
@@ -62,7 +62,12 @@
     multiple=True,
     help="Request that a particular dataset *not* be checked out (one which is currently configured to be checked out)",
 )
-@click.argument("refish", default=None, required=False, shell_complete=ref_completer)
+@click.argument(
+    "refish",
+    default=None,
+    required=False,
+    shell_complete=ref_completer,  # type: ignore[call-arg]
+)
 def checkout(
     ctx,
     new_branch,
@@ -320,7 +325,12 @@ def git_refetch(repo, promisor_remote, spec):
     help="If a local branch of given name doesn't exist, but a remote does, "
     "this option guesses that the user wants to create a local to track the remote",
 )
-@click.argument("refish", default=None, required=False, shell_complete=ref_completer)
+@click.argument(
+    "refish",
+    default=None,
+    required=False,
+    shell_complete=ref_completer,  # type: ignore[call-arg]
+)
 def switch(ctx, create, force_create, discard_changes, do_guess, refish):
     """
     Switch branches
@@ -475,7 +485,7 @@ def _is_in_branches(branch_name, branches):
         "tag associated with it. "
     ),
     default="HEAD",
-    shell_complete=ref_completer,
+    shell_complete=ref_completer,  # type: ignore[call-arg]
 )
 @click.argument("filters", nargs=-1)
 def restore(ctx, source, filters):
@@ -512,7 +522,11 @@ def restore(ctx, source, filters):
     is_flag=True,
     help="Discard local changes in working copy if necessary",
 )
-@click.argument("refish", default="HEAD", shell_complete=ref_completer)
+@click.argument(
+    "refish",
+    default="HEAD",
+    shell_complete=ref_completer,  # type: ignore[call-arg]
+)
 def reset(ctx, discard_changes, refish):
     """
     Reset the branch head to point to a particular commit.
diff --git a/kart/data.py b/kart/data.py
index 56b489f12..c6aa98052 100644
--- a/kart/data.py
+++ b/kart/data.py
@@ -33,7 +33,12 @@ def data(ctx, **kwargs):
     is_flag=True,
     help="When set, outputs the dataset type and version. (This may become the default in a later version of Kart)",
 )
-@click.argument("refish", required=False, default="HEAD", shell_complete=ref_completer)
+@click.argument(
+    "refish",
+    required=False,
+    default="HEAD",
+    shell_complete=ref_completer,  # type: ignore[call-arg]
+)
 @click.pass_context
 def data_ls(ctx, output_format, with_dataset_types, refish):
     """List all of the datasets in the Kart repository"""
diff --git a/kart/diff.py b/kart/diff.py
index 5f7a7a6c1..3b6c86116 100644
--- a/kart/diff.py
+++ b/kart/diff.py
@@ -159,7 +159,7 @@ def feature_count_diff(
     metavar="[REVISIONS] [--] [FILTERS]",
     nargs=-1,
     type=click.UNPROCESSED,
-    shell_complete=ref_or_repo_path_completer,
+    shell_complete=ref_or_repo_path_completer,  # type: ignore[call-arg]
 )
 def diff(
     ctx,
diff --git a/kart/export.py b/kart/export.py
index 64b671559..90bfa118a 100644
--- a/kart/export.py
+++ b/kart/export.py
@@ -25,7 +25,7 @@
     "args",
     nargs=-1,
     metavar="DATASET [EXPORT_TYPE:]DESTINATION",
-    shell_complete=repo_path_completer,
+    shell_complete=repo_path_completer,  # type: ignore[call-arg]
 )
 def export(ctx, args, **kwargs):
     """
diff --git a/kart/import_.py b/kart/import_.py
index d8fdb4e80..bf4b23f1d 100644
--- a/kart/import_.py
+++ b/kart/import_.py
@@ -75,7 +75,7 @@ def list_import_formats(ctx):
     "args",
     nargs=-1,
     metavar="SOURCE [[SOURCES...] or [DATASETS...]]",
-    shell_complete=file_path_completer,
+    shell_complete=file_path_completer,  # type: ignore[call-arg]
 )
 def import_(ctx, args, **kwargs):
     """
diff --git a/kart/init.py b/kart/init.py
index 7e177821f..b65dd5874 100644
--- a/kart/init.py
+++ b/kart/init.py
@@ -26,7 +26,7 @@
     "--import",
     "import_from",
     help='Import a database (all tables): "FORMAT:PATH" eg. "GPKG:my.gpkg". Currently only tabular formats are supported.',
-    shell_complete=file_path_completer,
+    shell_complete=file_path_completer,  # type: ignore[call-arg]
 )
 @click.option(
     "--checkout/--no-checkout",
diff --git a/kart/log.py b/kart/log.py
index f363592f5..35642cd63 100644
--- a/kart/log.py
+++ b/kart/log.py
@@ -235,7 +235,7 @@ def convert_user_patterns_to_raw_paths(paths, repo, commits):
     "args",
     metavar="[REVISIONS] [--] [FILTERS]",
     nargs=-1,
-    shell_complete=ref_or_repo_path_completer,
+    shell_complete=ref_or_repo_path_completer,  # type: ignore[call-arg]
 )
 def log(
     ctx,
diff --git a/kart/point_cloud/import_.py b/kart/point_cloud/import_.py
index da6a7ceca..b2985b212 100644
--- a/kart/point_cloud/import_.py
+++ b/kart/point_cloud/import_.py
@@ -116,7 +116,7 @@
     "args",
     nargs=-1,
     metavar="SOURCE [SOURCES...]",
-    shell_complete=file_path_completer,
+    shell_complete=file_path_completer,  # type: ignore[call-arg]
 )
 def point_cloud_import(
     ctx,
diff --git a/kart/pull.py b/kart/pull.py
index 99010867f..746d62885 100644
--- a/kart/pull.py
+++ b/kart/pull.py
@@ -45,7 +45,11 @@
 )
 @click.argument("repository", required=False, metavar="REMOTE")
 @click.argument(
-    "refspecs", nargs=-1, required=False, metavar="REFISH", shell_complete=ref_completer
+    "refspecs",
+    nargs=-1,
+    required=False,
+    metavar="REFISH",
+    shell_complete=ref_completer,  # type: ignore[call-arg]
 )
 @click.pass_context
 def pull(ctx, ff, ff_only, launch_editor, do_progress, repository, refspecs):
diff --git a/kart/raster/import_.py b/kart/raster/import_.py
index fd9fa9dcc..8b933485a 100644
--- a/kart/raster/import_.py
+++ b/kart/raster/import_.py
@@ -115,7 +115,7 @@
     "args",
     nargs=-1,
     metavar="SOURCE [SOURCES...]",
-    shell_complete=file_path_completer,
+    shell_complete=file_path_completer,  # type: ignore[call-arg]
 )
 def raster_import(
     ctx,
diff --git a/kart/resolve.py b/kart/resolve.py
index f9f144d2d..403c3cf40 100644
--- a/kart/resolve.py
+++ b/kart/resolve.py
@@ -538,7 +538,7 @@ def _fix_conflict_label(conflict_label):
     "conflict_labels",
     nargs=-1,
     metavar="[CONFLICT_LABELS]",
-    shell_complete=conflict_completer,
+    shell_complete=conflict_completer,  # type: ignore[call-arg]
 )
 def resolve(ctx, with_version, file_path, renumber, conflict_labels):
     """Resolve a merge conflict, using one of the conflicting versions, or with a user-supplied resolution."""
diff --git a/kart/show.py b/kart/show.py
index 1f071e949..89b4b247d 100644
--- a/kart/show.py
+++ b/kart/show.py
@@ -75,7 +75,7 @@
     metavar="[REVISION] [--] [FILTERS]",
     nargs=-1,
     type=click.UNPROCESSED,
-    shell_complete=ref_or_repo_path_completer,
+    shell_complete=ref_or_repo_path_completer,  # type: ignore[call-arg]
 )
 @click.option(
     "--diff-format",
@@ -184,7 +184,10 @@ def show(
 )
 # NOTE: this is *required* for now.
 # A future version might create patches from working-copy changes.
-@click.argument("refish", shell_complete=ref_completer)
+@click.argument(
+    "refish",
+    shell_complete=ref_completer,  # type: ignore[call-arg]
+)
 def create_patch(
     ctx,
     *,
diff --git a/kart/tabular/export.py b/kart/tabular/export.py
index 3194dd8fc..5acdcd9c5 100644
--- a/kart/tabular/export.py
+++ b/kart/tabular/export.py
@@ -167,7 +167,7 @@ def get_driver(destination_spec):
     "args",
     nargs=-1,
     metavar="DATASET [EXPORT_TYPE:]DESTINATION",
-    shell_complete=repo_path_completer,
+    shell_complete=repo_path_completer,  # type: ignore[call-arg]
 )
 def table_export(
     ctx,
diff --git a/kart/tabular/import_.py b/kart/tabular/import_.py
index b9bcb6602..706e82500 100644
--- a/kart/tabular/import_.py
+++ b/kart/tabular/import_.py
@@ -179,7 +179,7 @@ def any_at_all(iterable):
     "args",
     nargs=-1,
     metavar="SOURCE ([SOURCES...] or [TABLES...])",
-    shell_complete=file_path_completer,
+    shell_complete=file_path_completer,  # type: ignore[call-arg]
 )
 def table_import(
     ctx,

From a18573b2c1d28f454efbb5de054c087a2b738650 Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Sat, 9 Nov 2024 21:00:42 +1300
Subject: [PATCH 5/8] Add mypy pre-commit hook

Follows recommendations at https://github.com/python/mypy/issues/13916
---
 .pre-commit-config.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 516e7180a..60b46c9dd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,3 +28,11 @@ repos:
       - id: cmake-lint
         args:
           - --suppress-decorations
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.13.0
+    hooks:
+      - id: mypy
+        name: Check types with mypy
+        language: system
+        entry: build/venv/bin/mypy
+        pass_filenames: false
\ No newline at end of file

From f8c6bbd9cf7eb7bab7e9dd6bc873ab9017a23a5c Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Sat, 9 Nov 2024 22:06:26 +1300
Subject: [PATCH 6/8] Github Actions type checking

---
 .github/workflows/build.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8c9e5aeb7..95c239a69 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,6 +12,19 @@ env:
   AWS_NO_SIGN_REQUEST: "1"
 
 jobs:
+  type-checking:
+    name: Run Type Checking
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+          cache: 'pip' # caching pip dependencies
+      - run: pip install -r requirements/requirements.txt -r requirements/test.txt -r requirements/dev.txt
+      - name: Run mypy
+        run: mypy
+
   # Increase rate limits for public registry
   amazon-ecr-auth:
     runs-on: ubuntu-latest

From 9afda4c82fae75a3ce1658d14845e307971e1407 Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Mon, 11 Nov 2024 11:24:27 +1300
Subject: [PATCH 7/8] Fix WC session aliasing issue

session & state_session are no longer the same function; one calls the
other
---
 kart/working_copy.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/kart/working_copy.py b/kart/working_copy.py
index 41fd7f9c9..9ea2cefe0 100644
--- a/kart/working_copy.py
+++ b/kart/working_copy.py
@@ -668,14 +668,7 @@ def reset(
             repo_key_filter,
         )
 
-        # Start a DB session that surrounds both the working copy modifications and the state table modifications -
-        # if the same DB session can be used for both, otherwise, let _do_reset_datasets handle it.
-        session_context = (
-            self.session
-            if hasattr(self, "session") and self.session == self.state_session
-            else contextlib.nullcontext
-        )
-        with session_context():
+        with self.state_session() as sess:
             if ds_inserts or ds_updates or ds_deletes:
                 self._do_reset_datasets(
                     base_datasets=base_datasets,
@@ -691,15 +684,12 @@ def reset(
                     quiet=quiet,
                 )
 
-            with self.state_session() as sess:
-                if not track_changes_as_dirty:
-                    self._update_state_table_tree(sess, target_tree_id)
-                self._update_state_table_spatial_filter_hash(
-                    sess, self.repo.spatial_filter.hexhash
-                )
-                self._update_state_table_non_checkout_datasets(
-                    sess, non_checkout_datasets
-                )
+            if not track_changes_as_dirty:
+                self._update_state_table_tree(sess, target_tree_id)
+            self._update_state_table_spatial_filter_hash(
+                sess, self.repo.spatial_filter.hexhash
+            )
+            self._update_state_table_non_checkout_datasets(sess, non_checkout_datasets)
 
     def _do_reset_datasets(
         self,

From ce49e47b75862d10dbe0aeaa60e8bd23aec9b656 Mon Sep 17 00:00:00 2001
From: Craig de Stigter <craig@destigter.nz>
Date: Mon, 11 Nov 2024 13:08:02 +1300
Subject: [PATCH 8/8] Fix hang in pointcloud conflict tests

---
 kart/cli_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kart/cli_util.py b/kart/cli_util.py
index a5a28a4a6..dbbdb0058 100644
--- a/kart/cli_util.py
+++ b/kart/cli_util.py
@@ -179,7 +179,7 @@ def handle_parse_result(self, ctx, opts, args: list[str]):
                             f"is mutually exclusive with {other.get_error_hint(ctx)}."
                         )
                 else:
-                    self.prompt = ""
+                    self.prompt = None
         return super().handle_parse_result(ctx, opts, args)