UNFmontreal · bpinsard · Sep 24, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 26, 2024
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -5,7 +5,7 @@ on:
   pull_request:
     branches: [ main ]
   release:
-    types: [created]    
+    types: [created]
   workflow_dispatch:
 
 jobs:

diff --git a/docs/conf.py b/docs/conf.py
@@ -4,8 +4,9 @@
 # list see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
-# -- Path setup --------------------------------------------------------------
+from __future__ import annotations
 
+# -- Path setup --------------------------------------------------------------
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.

diff --git a/docs/devcontainer.md b/docs/devcontainer.md
@@ -1,7 +1,7 @@
 # GitHub Codespace
 
 The project's Codespace configuration is located in ".devcontainer". It includes the "Dockerfile" for the development container.
-The project can be opened directly in a Codespace. 
+The project can be opened directly in a Codespace.
 
 ## Running Unit Tests
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -1,7 +1,7 @@
 .. ai-python docs documentation master file, created by
    sphinx-quickstart on Thu May  5 14:06:45 2022.
    You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
+   contain the root ``toctree`` directive.
 
 Welcome to ai-python docs's documentation!
 ==========================================

diff --git a/docs/pre-commit-config.md b/docs/pre-commit-config.md
@@ -1,6 +1,6 @@
 # pre-commit-config.yaml
 
 Pre-commit is a Python package which can be used to create 'git' hooks which scan can prior to checkins.
-The included configuration focuses on python actions which will help to prevent users from commiting code which will fail during builds.
-In general, only formatting actions are automatiicaly performed. These include auto-formatting with 'black', or sorting dependacies with 'isort'. 
+The included configuration focuses on python actions which will help to prevent users from committing code which will fail during builds.
+In general, only formatting actions are automatiicaly performed. These include auto-formatting with 'black', or sorting dependacies with 'isort'.
 Linting actions are left to the discretion of the user.
diff --git a/docs/pylint.md b/docs/pylint.md
@@ -54,7 +54,7 @@ confidence=
 # can either give multiple identifiers separated by comma (,) or put this
 # option multiple times (only on the command line, not in the configuration
 # file where it should appear only once). You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
+# disable everything first and then re-enable specific checks. For example, if
 # you want to run only the similarities checker, you can use "--disable=all
 # --enable=similarities". If you want to run only the classes checker, but have
 # no Warning level messages displayed, use "--disable=all --enable=classes

diff --git a/src/README.md b/src/README.md
@@ -1 +1 @@
-This directoy stores each Python Package.
+This directory stores each Python Package.
diff --git a/src/forbids/cli/init.py b/src/forbids/cli/init.py
@@ -14,7 +14,7 @@
 from .. import schema
 
 configs = {}
-
+lgr = logging.getLogger(__name__)
 
 def get_config(datatype):
     if datatype in ["anat", "func", "dwi", "swi", "fmap"]:
@@ -26,9 +26,10 @@ def get_config(datatype):
         raise ValueError("unknown data type")
     if modality not in configs:
         with files("forbids").joinpath(f"config/{modality}_tags.json") as cfg_pth:
-            logging.debug(f"loading config {cfg_pth}")
+            lgr.debug(f"loading config {cfg_pth}")
             with open(cfg_pth) as cfg_fd:
                 configs[modality] = json.load(cfg_fd)
+        configs[modality]["properties"]["__instrument__"] = "="
     return configs[modality]
 
 
@@ -49,7 +50,7 @@ def initialize(
     excl_ents = ["subject", "run"] + (["session"] if uniform_sessions else [])
 
     for datatype in all_datatypes:
-        logging.info(f"processing {datatype}")
+        lgr.info(f"processing {datatype}")
         # list all unique sets of entities for this datatype
         # should results in 1+ set per series, unless scanner differences requires separate series
         # or results in different number of output series from the same sequence (eg. rec- acq-)
@@ -65,7 +66,6 @@ def initialize(
             for entity in schema.ALT_ENTITIES:
                 if entity not in series_entities:
                     series_entities[entity] = bids.layout.Query.NONE
-            logging.info(series_entities)
             generate_series_model(
                 bids_layout,
                 uniform_instruments=uniform_instruments,
@@ -101,7 +101,7 @@ def generate_series_model(
     )
 
     instrument_query_tags = []
-    # try grouping from more global to finer, (eg. first manufacture, then scanner then scanner+coil, ...)
+    # try grouping from more global to finer, (eg. first manufacturer, then scanner, then scanner+coil, ...)
     for instrument_tag, _ in instrument_groups.items():
         # cumulate instrument tags for query
         instrument_query_tags.append(instrument_tag)
@@ -125,12 +125,17 @@ def generate_series_model(
                 series_entities=non_null_entities,
                 factor_entities=("subject", "run") + ("session",) if uniform_sessions else tuple(),
             )
-        except ValidationError as e:
-            logging.warning(f"failed to group with {instrument_query_tags}")
-            logging.warning(e)
+        except ValidationError as error:
+            lgr.warning(f"failed to group with {instrument_query_tags}")
+            lgr.warning(
+                f"{error.__class__.__name__} "
+                f"{'.'.join(error.absolute_path)} : "
+                f"{error.message} found {error.instance if 'required' not in error.message else ''}"
+            )
             continue
+
         # one grouping scheme worked !
-        series_entities["subject"] = "ref"
+        non_null_entities["subject"] = "ref"
 
         # generate paths and folder
         schema_path = bids_layout.build_path(non_null_entities, absolute_paths=False)
@@ -150,5 +155,5 @@ def generate_series_model(
         with open(schema_path_abs, "wt") as fd:
             json.dump(json_schema, fd, indent=2)
 
-        logging.info("Successfully generated schema")
+        lgr.info(f"Successfully generated schema with grouping {instrument_query_tags}")
         break
diff --git a/src/forbids/cli/run.py b/src/forbids/cli/run.py
@@ -7,14 +7,17 @@
 import bids
 
 from .init import initialize
-from .validation import validate
+from .validation import validate, ValidationError
 
 DEBUG = bool(os.environ.get("DEBUG", False))
 if DEBUG:
     logging.basicConfig(level=logging.DEBUG)
+    logging.root.setLevel(logging.DEBUG)
 else:
     logging.basicConfig(level=logging.INFO)
+    logging.root.setLevel(logging.INFO)
 
+lgr = logging.getLogger(__name__)
 
 def parse_args():
 
@@ -50,6 +53,8 @@
     args = parse_args()
     layout = bids.BIDSLayout(os.path.abspath(args.bids_path))
 
+    lgr.debug(f"running {args.command}")
+
     if args.command == "init":
         initialize(
             layout,
@@ -61,9 +66,17 @@
         no_error = True
         for error in validate(layout, subject=args.participant_label, session=args.session_label):
             no_error = False
-            print(
-                f"{f"{error.__class__}" + '.'.join(error.absolute_path)} : {error.message} found {error.instance if 'required' not in error.message else ''}"
-            )
+            if isinstance(error, ValidationError):
+                lgr.error(
+                    "\n".join([f"{ec.json_path} {ec.message} found {ec.instance if not "required" in ec.message else ""}"
+                    for ec in error.context])
+                )
+            else:
+                lgr.error(
+                    f"{error.__class__.__name__} "
+                    f"{'.'.join(error.absolute_path)} : "
+                    f"{error.message} found {error.instance if 'required' not in error.message else ''}"
+                )
         exit(0 if no_error else 1)
 
 

diff --git a/src/forbids/cli/validation.py b/src/forbids/cli/validation.py
@@ -3,23 +3,30 @@
 import keyword
 import logging
 import os
+from typing import  Any, List, Optional, Iterator
 
 import bids
 import jsonschema.validators
 from jsonschema.exceptions import ValidationError
+from jsonschema._typing import Validator
 
 from .. import schema
 
+lgr = logging.getLogger(__name__)
 
 class BIDSFileError(ValidationError):
+    # class to represent error of BIDS file missing or unexpected
     pass
+"""
+    def __init__(self, message, path=None, missing=True):
+        self.path = path
+        self.missing = missing
 
+"""
 
-class BIDSExtraError(ValidationError):
-    pass
 
 
-def validate(bids_layout: bids.BIDSLayout, **entities):
+def validate(bids_layout: bids.BIDSLayout, **entities: dict[str, str|list]):
     # validates the data specified by entities using the schema present in the `.forbids` folder
 
     ref_layout = bids.BIDSLayout(os.path.join(bids_layout.root, schema.FORBIDS_SCHEMA_FOLDER), validate=False)
@@ -28,38 +35,65 @@
     ref_sidecars = ref_layout.get(session=[entities.get("session"), None], extension=".json")
 
     all_sidecars = bids_layout.get(extension=".json", **entities)
+    print(entities, len(all_sidecars))
+
+    subjects = bids_layout.get_subject(subject = entities.pop('subject'))
+
+    is_multisession = len(bids_layout.get_session())
+    is_session_specific = len(ref_layout.get_session())
+    if is_multisession:
+        lgr.info("The dataset is multi-session.")
+
+    all_sidecars = bids_layout.get(
+        extension=".json",
+        subject=subjects,
+        session=entities['session'],
+    )
+
 
     for ref_sidecar in ref_sidecars:
+        lgr.info(f"validating {ref_sidecar.relpath}")
         # load the schema
         sidecar_schema = ref_sidecar.get_dict()
         bidsfile_constraints = sidecar_schema.pop("bids", dict())
         query_entities = ref_sidecar.entities.copy()
-        query_entities["subject"] = entities.get("subject")
-        query_entities["session"] = entities.get("session", None)
-
-        for entity in schema.ALT_ENTITIES:
-            if entity not in query_entities:
-                query_entities[entity] = bids.layout.Query.NONE
-        sidecars_to_validate = bids_layout.get(**query_entities)
-        if not sidecars_to_validate and not bidsfile_constraints.get("optional", False):
-            yield BIDSFileError(f"{ref_sidecar} found no match")
-        num_sidecars = len(sidecars_to_validate)
-        min_runs = bidsfile_constraints.get("min_runs", 0)
-        max_runs = bidsfile_constraints.get("max_runs", 1e10)
-        if num_sidecars < min_runs:
-            yield BIDSFileError("Expected at least {min_runs} runs for {ref_sidecar}, found {num_sidecars}")
-        elif num_sidecars > max_runs:
-            yield BIDSFileError("Expected at most {max_runs} runs for {ref_sidecar}, found {num_sidecars}")
 
         validator = schema.get_validator(sidecar_schema)
 
-        for sidecar in sidecars_to_validate:
-            if sidecar in all_sidecars:
-                all_sidecars.remove(sidecar)
-            else:
-                logging.error("an error occurred")
-            logging.info(f"validating {sidecar.path}")
-            sidecar_data = schema.prepare_metadata(sidecar, bidsfile_constraints["instrument_tags"])
-            yield from validator.iter_errors(sidecar_data)
+        for subject in subjects:
+            query_entities["subject"] = subject
+            sessions = bids_layout.get_session(
+                subject = subject,
+                session = entities["session"]
+            )
+
+            for session in sessions:
+                query_entities["session"] = session
+
+                lgr.debug(query_entities)
+
+                sidecars_to_validate = bids_layout.get(**query_entities)
+
+                if not sidecars_to_validate and not bidsfile_constraints.get("optional", False):
+                    yield BIDSFileError(f"{ref_sidecar.relpath} found no match")
+                    continue # no point going further
+
+                num_sidecars = len(sidecars_to_validate)
+                min_runs = bidsfile_constraints.get("min_runs", 0)
+                max_runs = bidsfile_constraints.get("max_runs", 1e10)
+                if num_sidecars < min_runs:
+                    yield BIDSFileError(f"Expected at least {min_runs} runs for {ref_sidecar.relpath}, found {num_sidecars}")
+                elif num_sidecars > max_runs:
+                    yield BIDSFileError(f"Expected at most {max_runs} runs for {ref_sidecar.relpath}, found {num_sidecars}")
+
+                for sidecar in sidecars_to_validate:
+                    if sidecar in all_sidecars:
+                        all_sidecars.remove(sidecar)
+                    else:
+                        lgr.error("an error occurred")
+                    lgr.info(f"validating {sidecar.path}")
+                    sidecar_data = schema.prepare_metadata(sidecar, bidsfile_constraints["instrument_tags"])
+                    yield from validator.iter_errors(sidecar_data)
     for extra_sidecar in all_sidecars:
-        yield BIDSExtraError(f"Extra BIDS file{extra_sidecar.path}")
+        relpath = extra_sidecar.path
+        yield BIDSFileError(f"Unexpected BIDS file{extra_sidecar.relpath}")
diff --git a/src/forbids/config/mri_tags.json b/src/forbids/config/mri_tags.json
@@ -80,4 +80,4 @@
             }
         }
     }
-}
+}
diff --git a/src/forbids/schema.py b/src/forbids/schema.py
@@ -12,8 +12,9 @@
 from apischema import discriminator, schema
 from apischema.json_schema import deserialization_schema
 
-FORBIDS_SCHEMA_FOLDER = ".forbids"
+lgr = logging.getLogger(__name__)
 
+FORBIDS_SCHEMA_FOLDER = ".forbids"
 
 # entities that differentiate files from the same series
 # where it might be None for one of the files.
@@ -94,21 +95,23 @@ def sidecars2unionschema(
 
     schema_name = bids_layout.build_path(series_entities, absolute_paths=False)
     subschemas = []
+    mapping_keys = []
     for keys, sidecars in sidecars_groups.items():
         instrument_tags = [k[0] for k in keys]
         sidecars = list(sidecars)
         # generate sidecar from first examplar
         sc = sidecars[0]
-        logging.info(f"generating schema from {sc.path}")
+        lgr.info(f"generating schema from {sc.relpath}")
         metas = prepare_metadata(sc, instrument_tags)
+        mapping_keys.append(metas['__instrument__'])
         subschema_name = schema_name + "-".join([k.replace(".", "_") for t, k in keys])
         # while subschema_name in subschemas:
         #    subschema_name = subschema_name + "_copy"
         subschema = sidecar2schema(metas, config_props, subschema_name)
         # check if we can apply the schema from 1st sidecar to the others:
         validator = get_validator(deserialization_schema(subschema, additional_properties=True))
         for sidecar in sidecars[1:]:
-            logging.info(f"validating schema from {sidecar.path}")
+            lgr.info(f"validating schema from {sidecar.relpath}")
             # validate or raise
             validator.validate(prepare_metadata(sidecar, instrument_tags))
 
@@ -119,7 +122,11 @@ def sidecars2unionschema(
         return subschemas[0]
 
     UnionModel = Annotated[
-        Union[tuple(subschemas)], discriminator("__instrument__", {sc.__name__: sc for sc in subschemas})
+        Union[tuple(subschemas)],
+        discriminator(
+            "__instrument__",
+#            {k :sc.__name__ for k, sc in zip(mapping_keys, subschemas)}
+        )
     ]
 
     return UnionModel
@@ -132,14 +139,14 @@ def compare_schema(sc1: dataclass, sc2: dataclass) -> bool:
     sc2_props = sc2.__dataclass_fields__
     sc1_props_keys = set(sc1_props.keys())
     sc2_props_keys = set(sc2_props.keys())
-    logging.debug(f"XOR: {set(sc1_props_keys).symmetric_difference(sc2_props_keys)}")
+    lgr.debug(f"XOR: {set(sc1_props_keys).symmetric_difference(sc2_props_keys)}")
     for prop in sc1_props_keys.intersection(sc2_props_keys):
         if isinstance(sc1_props[prop], type):
             match = compare_schema(sc1_props[prop], sc2_props[prop])
         t1, t2 = sc1_props[prop].type, sc2_props[prop].type
         if t1 != t2:
             if not hasattr(t1, "__supertype__") or t1.__supertype__ != t2.__supertype__:
-                logging.debug(str((prop, sc1_props[prop].type, sc2_props[prop].type)))
+                lgr.debug(str((prop, sc1_props[prop].type, sc2_props[prop].type)))
                 match = False
     return match
 
@@ -153,5 +160,6 @@ def prepare_metadata(
     # rename conflictual keywords as the schema was created
     sidecar_data = {k + ("__" if k in keyword.kwlist else ""): v for k, v in sidecar.get_dict().items()}
     # create an aggregate tag of all schema-defined instrument tags
-    sidecar_data["__instrument__"] = [sidecar_data.get(instr_tag, None) for instr_tag in instrument_tags]
+    sidecar_data["__instrument__"] = '-'.join([sidecar_data.get(instr_tag, None) for instr_tag in instrument_tags])
+    print(sidecar_data["__instrument__"])
     return sidecar_data