Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: wip #48

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
pull_request:
branches: [ main ]
release:
types: [created]
types: [created]
workflow_dispatch:

jobs:
Expand Down
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Path setup --------------------------------------------------------------
from __future__ import annotations

# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
Expand Down
2 changes: 1 addition & 1 deletion docs/devcontainer.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# GitHub Codespace

The project's Codespace configuration is located in ".devcontainer". It includes the "Dockerfile" for the development container.
The project can be opened directly in a Codespace.
The project can be opened directly in a Codespace.

## Running Unit Tests

Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.. ai-python docs documentation master file, created by
sphinx-quickstart on Thu May 5 14:06:45 2022.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
contain the root ``toctree`` directive.

Welcome to ai-python docs's documentation!
==========================================
Expand Down
4 changes: 2 additions & 2 deletions docs/pre-commit-config.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pre-commit-config.yaml

Pre-commit is a Python package which can be used to create 'git' hooks which scan can prior to checkins.
The included configuration focuses on python actions which will help to prevent users from commiting code which will fail during builds.
In general, only formatting actions are automatiicaly performed. These include auto-formatting with 'black', or sorting dependacies with 'isort'.
The included configuration focuses on python actions which will help to prevent users from committing code which will fail during builds.
In general, only formatting actions are automatiicaly performed. These include auto-formatting with 'black', or sorting dependacies with 'isort'.
Linting actions are left to the discretion of the user.
2 changes: 1 addition & 1 deletion docs/pylint.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ confidence=
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then reenable specific checks. For example, if
# disable everything first and then re-enable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
Expand Down
2 changes: 1 addition & 1 deletion src/README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
This directoy stores each Python Package.
This directory stores each Python Package.
25 changes: 15 additions & 10 deletions src/forbids/cli/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .. import schema

configs = {}

lgr = logging.getLogger(__name__)

def get_config(datatype):
if datatype in ["anat", "func", "dwi", "swi", "fmap"]:
Expand All @@ -26,9 +26,10 @@ def get_config(datatype):
raise ValueError("unknown data type")
if modality not in configs:
with files("forbids").joinpath(f"config/{modality}_tags.json") as cfg_pth:
logging.debug(f"loading config {cfg_pth}")
lgr.debug(f"loading config {cfg_pth}")
with open(cfg_pth) as cfg_fd:
configs[modality] = json.load(cfg_fd)
configs[modality]["properties"]["__instrument__"] = "="
return configs[modality]


Expand All @@ -49,7 +50,7 @@ def initialize(
excl_ents = ["subject", "run"] + (["session"] if uniform_sessions else [])

for datatype in all_datatypes:
logging.info(f"processing {datatype}")
lgr.info(f"processing {datatype}")
# list all unique sets of entities for this datatype
# should results in 1+ set per series, unless scanner differences requires separate series
# or results in different number of output series from the same sequence (eg. rec- acq-)
Expand All @@ -65,7 +66,6 @@ def initialize(
for entity in schema.ALT_ENTITIES:
if entity not in series_entities:
series_entities[entity] = bids.layout.Query.NONE
logging.info(series_entities)
generate_series_model(
bids_layout,
uniform_instruments=uniform_instruments,
Expand Down Expand Up @@ -101,7 +101,7 @@ def generate_series_model(
)

instrument_query_tags = []
# try grouping from more global to finer, (eg. first manufacture, then scanner then scanner+coil, ...)
# try grouping from more global to finer, (eg. first manufacturer, then scanner, then scanner+coil, ...)
for instrument_tag, _ in instrument_groups.items():
# cumulate instrument tags for query
instrument_query_tags.append(instrument_tag)
Expand All @@ -125,12 +125,17 @@ def generate_series_model(
series_entities=non_null_entities,
factor_entities=("subject", "run") + ("session",) if uniform_sessions else tuple(),
)
except ValidationError as e:
logging.warning(f"failed to group with {instrument_query_tags}")
logging.warning(e)
except ValidationError as error:
lgr.warning(f"failed to group with {instrument_query_tags}")
lgr.warning(
f"{error.__class__.__name__} "
f"{'.'.join(error.absolute_path)} : "
f"{error.message} found {error.instance if 'required' not in error.message else ''}"
)
continue

# one grouping scheme worked !
series_entities["subject"] = "ref"
non_null_entities["subject"] = "ref"

# generate paths and folder
schema_path = bids_layout.build_path(non_null_entities, absolute_paths=False)
Expand All @@ -150,5 +155,5 @@ def generate_series_model(
with open(schema_path_abs, "wt") as fd:
json.dump(json_schema, fd, indent=2)

logging.info("Successfully generated schema")
lgr.info(f"Successfully generated schema with grouping {instrument_query_tags}")
break
21 changes: 17 additions & 4 deletions src/forbids/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
import bids

from .init import initialize
from .validation import validate
from .validation import validate, ValidationError

DEBUG = bool(os.environ.get("DEBUG", False))
if DEBUG:
logging.basicConfig(level=logging.DEBUG)
logging.root.setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.root.setLevel(logging.INFO)

lgr = logging.getLogger(__name__)

def parse_args():

Expand Down Expand Up @@ -50,6 +53,8 @@
args = parse_args()
layout = bids.BIDSLayout(os.path.abspath(args.bids_path))

lgr.debug(f"running {args.command}")

if args.command == "init":
initialize(
layout,
Expand All @@ -61,9 +66,17 @@
no_error = True
for error in validate(layout, subject=args.participant_label, session=args.session_label):
no_error = False
print(
f"{f"{error.__class__}" + '.'.join(error.absolute_path)} : {error.message} found {error.instance if 'required' not in error.message else ''}"
)
if isinstance(error, ValidationError):
lgr.error(
"\n".join([f"{ec.json_path} {ec.message} found {ec.instance if not "required" in ec.message else ""}"

Check failure on line 71 in src/forbids/cli/run.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/run.py#L71 <999>

SyntaxError: f-string: expecting '}'
Raw output
./src/forbids/cli/run.py:71:90: E999 SyntaxError: f-string: expecting '}'
for ec in error.context])
)
else:
lgr.error(
f"{error.__class__.__name__} "
f"{'.'.join(error.absolute_path)} : "
f"{error.message} found {error.instance if 'required' not in error.message else ''}"
)
exit(0 if no_error else 1)


Expand Down
90 changes: 62 additions & 28 deletions src/forbids/cli/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,30 @@
import keyword
import logging
import os
from typing import Any, List, Optional, Iterator

Check failure on line 6 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L6 <401>

'typing.Any' imported but unused
Raw output
./src/forbids/cli/validation.py:6:1: F401 'typing.Any' imported but unused

Check failure on line 6 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L6 <401>

'typing.List' imported but unused
Raw output
./src/forbids/cli/validation.py:6:1: F401 'typing.List' imported but unused

Check failure on line 6 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L6 <401>

'typing.Optional' imported but unused
Raw output
./src/forbids/cli/validation.py:6:1: F401 'typing.Optional' imported but unused

Check failure on line 6 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L6 <401>

'typing.Iterator' imported but unused
Raw output
./src/forbids/cli/validation.py:6:1: F401 'typing.Iterator' imported but unused

Check failure on line 6 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L6 <271>

multiple spaces after keyword
Raw output
./src/forbids/cli/validation.py:6:19: E271 multiple spaces after keyword

import bids
import jsonschema.validators
from jsonschema.exceptions import ValidationError
from jsonschema._typing import Validator

Check failure on line 11 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L11 <401>

'jsonschema._typing.Validator' imported but unused
Raw output
./src/forbids/cli/validation.py:11:1: F401 'jsonschema._typing.Validator' imported but unused

from .. import schema

lgr = logging.getLogger(__name__)

class BIDSFileError(ValidationError):
# class to represent error of BIDS file missing or unexpected
pass
"""

Check failure on line 20 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L20 <305>

expected 2 blank lines after class or function definition, found 0
Raw output
./src/forbids/cli/validation.py:20:1: E305 expected 2 blank lines after class or function definition, found 0
def __init__(self, message, path=None, missing=True):
self.path = path
self.missing = missing

"""

class BIDSExtraError(ValidationError):
pass


def validate(bids_layout: bids.BIDSLayout, **entities):
def validate(bids_layout: bids.BIDSLayout, **entities: dict[str, str|list]):

Check failure on line 29 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L29 <303>

too many blank lines (3)
Raw output
./src/forbids/cli/validation.py:29:1: E303 too many blank lines (3)

Check failure on line 29 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L29 <227>

missing whitespace around bitwise or shift operator
Raw output
./src/forbids/cli/validation.py:29:69: E227 missing whitespace around bitwise or shift operator
# validates the data specified by entities using the schema present in the `.forbids` folder

ref_layout = bids.BIDSLayout(os.path.join(bids_layout.root, schema.FORBIDS_SCHEMA_FOLDER), validate=False)
Expand All @@ -28,38 +35,65 @@
ref_sidecars = ref_layout.get(session=[entities.get("session"), None], extension=".json")

all_sidecars = bids_layout.get(extension=".json", **entities)
print(entities, len(all_sidecars))

subjects = bids_layout.get_subject(subject = entities.pop('subject'))

Check failure on line 40 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L40 <251>

unexpected spaces around keyword / parameter equals
Raw output
./src/forbids/cli/validation.py:40:47: E251 unexpected spaces around keyword / parameter equals

Check failure on line 40 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L40 <251>

unexpected spaces around keyword / parameter equals
Raw output
./src/forbids/cli/validation.py:40:49: E251 unexpected spaces around keyword / parameter equals

is_multisession = len(bids_layout.get_session())
is_session_specific = len(ref_layout.get_session())

Check failure on line 43 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L43 <841>

local variable 'is_session_specific' is assigned to but never used
Raw output
./src/forbids/cli/validation.py:43:5: F841 local variable 'is_session_specific' is assigned to but never used
if is_multisession:
lgr.info("The dataset is multi-session.")

all_sidecars = bids_layout.get(
extension=".json",
subject=subjects,
session=entities['session'],
)


for ref_sidecar in ref_sidecars:
lgr.info(f"validating {ref_sidecar.relpath}")
# load the schema
sidecar_schema = ref_sidecar.get_dict()
bidsfile_constraints = sidecar_schema.pop("bids", dict())
query_entities = ref_sidecar.entities.copy()
query_entities["subject"] = entities.get("subject")
query_entities["session"] = entities.get("session", None)

for entity in schema.ALT_ENTITIES:
if entity not in query_entities:
query_entities[entity] = bids.layout.Query.NONE
sidecars_to_validate = bids_layout.get(**query_entities)
if not sidecars_to_validate and not bidsfile_constraints.get("optional", False):
yield BIDSFileError(f"{ref_sidecar} found no match")
num_sidecars = len(sidecars_to_validate)
min_runs = bidsfile_constraints.get("min_runs", 0)
max_runs = bidsfile_constraints.get("max_runs", 1e10)
if num_sidecars < min_runs:
yield BIDSFileError("Expected at least {min_runs} runs for {ref_sidecar}, found {num_sidecars}")
elif num_sidecars > max_runs:
yield BIDSFileError("Expected at most {max_runs} runs for {ref_sidecar}, found {num_sidecars}")

validator = schema.get_validator(sidecar_schema)

for sidecar in sidecars_to_validate:
if sidecar in all_sidecars:
all_sidecars.remove(sidecar)
else:
logging.error("an error occurred")
logging.info(f"validating {sidecar.path}")
sidecar_data = schema.prepare_metadata(sidecar, bidsfile_constraints["instrument_tags"])
yield from validator.iter_errors(sidecar_data)
for subject in subjects:
query_entities["subject"] = subject
sessions = bids_layout.get_session(
subject = subject,

Check failure on line 66 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L66 <251>

unexpected spaces around keyword / parameter equals
Raw output
./src/forbids/cli/validation.py:66:24: E251 unexpected spaces around keyword / parameter equals

Check failure on line 66 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L66 <251>

unexpected spaces around keyword / parameter equals
Raw output
./src/forbids/cli/validation.py:66:26: E251 unexpected spaces around keyword / parameter equals
session = entities["session"]

Check failure on line 67 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L67 <251>

unexpected spaces around keyword / parameter equals
Raw output
./src/forbids/cli/validation.py:67:24: E251 unexpected spaces around keyword / parameter equals

Check failure on line 67 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L67 <251>

unexpected spaces around keyword / parameter equals
Raw output
./src/forbids/cli/validation.py:67:26: E251 unexpected spaces around keyword / parameter equals
)

for session in sessions:
query_entities["session"] = session

lgr.debug(query_entities)

sidecars_to_validate = bids_layout.get(**query_entities)

if not sidecars_to_validate and not bidsfile_constraints.get("optional", False):
yield BIDSFileError(f"{ref_sidecar.relpath} found no match")
continue # no point going further

Check failure on line 79 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L79 <261>

at least two spaces before inline comment
Raw output
./src/forbids/cli/validation.py:79:29: E261 at least two spaces before inline comment

num_sidecars = len(sidecars_to_validate)
min_runs = bidsfile_constraints.get("min_runs", 0)
max_runs = bidsfile_constraints.get("max_runs", 1e10)
if num_sidecars < min_runs:
yield BIDSFileError(f"Expected at least {min_runs} runs for {ref_sidecar.relpath}, found {num_sidecars}")

Check failure on line 85 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L85 <501>

line too long (125 > 120 characters)
Raw output
./src/forbids/cli/validation.py:85:121: E501 line too long (125 > 120 characters)
elif num_sidecars > max_runs:
yield BIDSFileError(f"Expected at most {max_runs} runs for {ref_sidecar.relpath}, found {num_sidecars}")

Check failure on line 87 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L87 <501>

line too long (124 > 120 characters)
Raw output
./src/forbids/cli/validation.py:87:121: E501 line too long (124 > 120 characters)

for sidecar in sidecars_to_validate:
if sidecar in all_sidecars:
all_sidecars.remove(sidecar)
else:
lgr.error("an error occurred")
lgr.info(f"validating {sidecar.path}")
sidecar_data = schema.prepare_metadata(sidecar, bidsfile_constraints["instrument_tags"])
yield from validator.iter_errors(sidecar_data)
for extra_sidecar in all_sidecars:
yield BIDSExtraError(f"Extra BIDS file{extra_sidecar.path}")
relpath = extra_sidecar.path

Check failure on line 98 in src/forbids/cli/validation.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] src/forbids/cli/validation.py#L98 <841>

local variable 'relpath' is assigned to but never used
Raw output
./src/forbids/cli/validation.py:98:9: F841 local variable 'relpath' is assigned to but never used
yield BIDSFileError(f"Unexpected BIDS file{extra_sidecar.relpath}")
2 changes: 1 addition & 1 deletion src/forbids/config/mri_tags.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,4 @@
}
}
}
}
}
22 changes: 15 additions & 7 deletions src/forbids/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
from apischema import discriminator, schema
from apischema.json_schema import deserialization_schema

FORBIDS_SCHEMA_FOLDER = ".forbids"
lgr = logging.getLogger(__name__)

FORBIDS_SCHEMA_FOLDER = ".forbids"

# entities that differentiate files from the same series
# where it might be None for one of the files.
Expand Down Expand Up @@ -94,21 +95,23 @@ def sidecars2unionschema(

schema_name = bids_layout.build_path(series_entities, absolute_paths=False)
subschemas = []
mapping_keys = []
for keys, sidecars in sidecars_groups.items():
instrument_tags = [k[0] for k in keys]
sidecars = list(sidecars)
# generate sidecar from first examplar
sc = sidecars[0]
logging.info(f"generating schema from {sc.path}")
lgr.info(f"generating schema from {sc.relpath}")
metas = prepare_metadata(sc, instrument_tags)
mapping_keys.append(metas['__instrument__'])
subschema_name = schema_name + "-".join([k.replace(".", "_") for t, k in keys])
# while subschema_name in subschemas:
# subschema_name = subschema_name + "_copy"
subschema = sidecar2schema(metas, config_props, subschema_name)
# check if we can apply the schema from 1st sidecar to the others:
validator = get_validator(deserialization_schema(subschema, additional_properties=True))
for sidecar in sidecars[1:]:
logging.info(f"validating schema from {sidecar.path}")
lgr.info(f"validating schema from {sidecar.relpath}")
# validate or raise
validator.validate(prepare_metadata(sidecar, instrument_tags))

Expand All @@ -119,7 +122,11 @@ def sidecars2unionschema(
return subschemas[0]

UnionModel = Annotated[
Union[tuple(subschemas)], discriminator("__instrument__", {sc.__name__: sc for sc in subschemas})
Union[tuple(subschemas)],
discriminator(
"__instrument__",
# {k :sc.__name__ for k, sc in zip(mapping_keys, subschemas)}
)
]

return UnionModel
Expand All @@ -132,14 +139,14 @@ def compare_schema(sc1: dataclass, sc2: dataclass) -> bool:
sc2_props = sc2.__dataclass_fields__
sc1_props_keys = set(sc1_props.keys())
sc2_props_keys = set(sc2_props.keys())
logging.debug(f"XOR: {set(sc1_props_keys).symmetric_difference(sc2_props_keys)}")
lgr.debug(f"XOR: {set(sc1_props_keys).symmetric_difference(sc2_props_keys)}")
for prop in sc1_props_keys.intersection(sc2_props_keys):
if isinstance(sc1_props[prop], type):
match = compare_schema(sc1_props[prop], sc2_props[prop])
t1, t2 = sc1_props[prop].type, sc2_props[prop].type
if t1 != t2:
if not hasattr(t1, "__supertype__") or t1.__supertype__ != t2.__supertype__:
logging.debug(str((prop, sc1_props[prop].type, sc2_props[prop].type)))
lgr.debug(str((prop, sc1_props[prop].type, sc2_props[prop].type)))
match = False
return match

Expand All @@ -153,5 +160,6 @@ def prepare_metadata(
# rename conflictual keywords as the schema was created
sidecar_data = {k + ("__" if k in keyword.kwlist else ""): v for k, v in sidecar.get_dict().items()}
# create an aggregate tag of all schema-defined instrument tags
sidecar_data["__instrument__"] = [sidecar_data.get(instr_tag, None) for instr_tag in instrument_tags]
sidecar_data["__instrument__"] = '-'.join([sidecar_data.get(instr_tag, None) for instr_tag in instrument_tags])
print(sidecar_data["__instrument__"])
return sidecar_data
Loading