From 9a15ee5767d4eb1935b0e1a8c99d1c59fbc4e25c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9d=C3=A9ric=20Hurier=20=28Fmind=29?= Date: Sun, 25 Feb 2024 22:00:54 +0100 Subject: [PATCH] V0.4.0 (#4) --- .github/ISSUE_TEMPLATE.md | 8 +- .github/PULL_REQUEST_TEMPLATE.md | 2 - .github/workflows/on-pull-request.yml | 2 +- .gitignore | 1 + .pre-commit-config.yaml | 2 +- LICENCE.txt | 396 +++++++++++++++++++++++++- README.md | 6 + mlops-python-package.code-workspace | 10 +- poetry.lock | 175 ++---------- pyproject.toml | 35 +-- src/bikes/__main__.py | 10 + src/bikes/configs.py | 1 - src/bikes/jobs.py | 20 +- src/bikes/metrics.py | 4 +- src/bikes/models.py | 9 +- src/bikes/registers.py | 18 +- src/bikes/schemas.py | 1 + src/bikes/scripts.py | 6 +- src/bikes/services.py | 8 +- src/bikes/splitters.py | 24 +- tasks/checks.py | 41 ++- tasks/cleans.py | 83 ++++-- tasks/containers.py | 10 +- tasks/dags.py | 2 - tasks/docs.py | 7 +- tasks/formats.py | 16 +- tasks/installs.py | 2 - tasks/packages.py | 2 - tests/conftest.py | 14 +- tests/test_configs.py | 5 +- tests/test_datasets.py | 3 - tests/test_jobs.py | 50 +++- tests/test_metrics.py | 3 - tests/test_models.py | 9 +- tests/test_registers.py | 19 +- tests/test_schemas.py | 2 - tests/test_scripts.py | 3 - tests/test_searchers.py | 12 +- tests/test_services.py | 25 +- tests/test_splitters.py | 14 +- 40 files changed, 722 insertions(+), 338 deletions(-) create mode 100644 src/bikes/__main__.py diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 610808e..5a06345 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,16 +1,14 @@ -## Expected Behavior - - ## Actual Behavior +## Expected Behavior -## Steps to Reproduce the Problem +## Steps to Reproduce 1. 1. 1. -## Specifications +## System Specifications - Version: - Platform: diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 0dd2e9d..4006d43 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,6 +2,4 @@ Fixes # ## Proposed Changes -- -- - \ No newline at end of file diff --git a/.github/workflows/on-pull-request.yml b/.github/workflows/on-pull-request.yml index dd69569..803004c 100644 --- a/.github/workflows/on-pull-request.yml +++ b/.github/workflows/on-pull-request.yml @@ -11,5 +11,5 @@ jobs: steps: - uses: actions/checkout@v4 - uses: ./.github/actions/setup - - run: poetry install --with dev,checks,formats + - run: poetry install --with dev,checks - run: poetry run invoke checks diff --git a/.gitignore b/.gitignore index a78d83e..06f385d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ .cache/ .coverage* .mypy_cache/ +.ruff_cache/ .pytest_cache/ # Editor diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 292d244..8921eb0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ default_language_version: repos: # commons - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files - id: check-case-conflict diff --git a/LICENCE.txt b/LICENCE.txt index 7d6b4e5..f987f3d 100644 --- a/LICENCE.txt +++ b/LICENCE.txt @@ -1 +1,395 @@ -https://creativecommons.org/licenses/by/2.0/ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public licenses. +Notwithstanding, Creative Commons may elect to apply one of its public +licenses to material it publishes and in those instances will be +considered the “Licensor.” The text of the Creative Commons public +licenses is dedicated to the public domain under the CC0 Public Domain +Dedication. Except for the limited purpose of indicating that material +is shared under a Creative Commons public license or as otherwise +permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the public +licenses. + +Creative Commons may be contacted at creativecommons.org. \ No newline at end of file diff --git a/README.md b/README.md index 9eb5ac8..8966402 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # MLOps Python Package +[![on-release-published.yml](https://github.com/fmind/mlops-python-package/actions/workflows/on-release-published.yml/badge.svg)](https://github.com/fmind/mlops-python-package/actions/workflows/on-release-published.yml) +[![Release](https://img.shields.io/github/v/release/fmind/mlops-python-package)](https://github.com/fmind/mlops-python-package/releases) + +[![License](https://img.shields.io/github/license/fmind/mlops-python-package)](https://github.com/fmind/mlops-python-package/blob/main/LICENCE.txt) +[![Documentation](https://img.shields.io/badge/documentation-available-brightgreen.svg)](https://fmind.github.io/mlops-python-package/) + This repository contains a Python package implementation designed to support MLOps initiatives. The package uses several [tools](#tools) and [tips](#tips) to make your MLOps experience as flexible, robust, productive as possible. diff --git a/mlops-python-package.code-workspace b/mlops-python-package.code-workspace index f551ecd..a031fad 100644 --- a/mlops-python-package.code-workspace +++ b/mlops-python-package.code-workspace @@ -15,19 +15,15 @@ "editor.codeActionsOnSave": { "source.organizeImports": "explicit" }, - "editor.defaultFormatter": "ms-python.black-formatter" + "editor.defaultFormatter": "charliermarsh.ruff", }, }, "extensions": { "recommendations": [ + "charliermarsh.ruff", "dchanco.vsc-invoke", - "ms-python.black-formatter", - "ms-python.isort", - "ms-python.mypy-type-checker", - "ms-python.pylint", "ms-python.python", "ms-python.vscode-pylance", - "redhat.vscode-yaml", ] } -} \ No newline at end of file +} diff --git a/poetry.lock b/poetry.lock index e137d27..2c555e4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -32,17 +32,6 @@ files = [ {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"}, ] -[[package]] -name = "astroid" -version = "3.0.3" -description = "An abstract syntax tree for Python with inference support." -optional = false -python-versions = ">=3.8.0" -files = [ - {file = "astroid-3.0.3-py3-none-any.whl", hash = "sha256:92fcf218b89f449cdf9f7b39a269f8d5d617b27be68434912e11e79203963a17"}, - {file = "astroid-3.0.3.tar.gz", hash = "sha256:4148645659b08b70d72460ed1921158027a9e53ae8b7234149b1400eddacbb93"}, -] - [[package]] name = "asttokens" version = "2.4.1" @@ -80,50 +69,6 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] -[[package]] -name = "black" -version = "24.2.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.8" -files = [ - {file = "black-24.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6981eae48b3b33399c8757036c7f5d48a535b962a7c2310d19361edeef64ce29"}, - {file = "black-24.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d533d5e3259720fdbc1b37444491b024003e012c5173f7d06825a77508085430"}, - {file = "black-24.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61a0391772490ddfb8a693c067df1ef5227257e72b0e4108482b8d41b5aee13f"}, - {file = "black-24.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:992e451b04667116680cb88f63449267c13e1ad134f30087dec8527242e9862a"}, - {file = "black-24.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:163baf4ef40e6897a2a9b83890e59141cc8c2a98f2dda5080dc15c00ee1e62cd"}, - {file = "black-24.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e37c99f89929af50ffaf912454b3e3b47fd64109659026b678c091a4cd450fb2"}, - {file = "black-24.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9de21bafcba9683853f6c96c2d515e364aee631b178eaa5145fc1c61a3cc92"}, - {file = "black-24.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:9db528bccb9e8e20c08e716b3b09c6bdd64da0dd129b11e160bf082d4642ac23"}, - {file = "black-24.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d84f29eb3ee44859052073b7636533ec995bd0f64e2fb43aeceefc70090e752b"}, - {file = "black-24.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e08fb9a15c914b81dd734ddd7fb10513016e5ce7e6704bdd5e1251ceee51ac9"}, - {file = "black-24.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:810d445ae6069ce64030c78ff6127cd9cd178a9ac3361435708b907d8a04c693"}, - {file = "black-24.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ba15742a13de85e9b8f3239c8f807723991fbfae24bad92d34a2b12e81904982"}, - {file = "black-24.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7e53a8c630f71db01b28cd9602a1ada68c937cbf2c333e6ed041390d6968faf4"}, - {file = "black-24.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:93601c2deb321b4bad8f95df408e3fb3943d85012dddb6121336b8e24a0d1218"}, - {file = "black-24.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0057f800de6acc4407fe75bb147b0c2b5cbb7c3ed110d3e5999cd01184d53b0"}, - {file = "black-24.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:faf2ee02e6612577ba0181f4347bcbcf591eb122f7841ae5ba233d12c39dcb4d"}, - {file = "black-24.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:057c3dc602eaa6fdc451069bd027a1b2635028b575a6c3acfd63193ced20d9c8"}, - {file = "black-24.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:08654d0797e65f2423f850fc8e16a0ce50925f9337fb4a4a176a7aa4026e63f8"}, - {file = "black-24.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca610d29415ee1a30a3f30fab7a8f4144e9d34c89a235d81292a1edb2b55f540"}, - {file = "black-24.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:4dd76e9468d5536abd40ffbc7a247f83b2324f0c050556d9c371c2b9a9a95e31"}, - {file = "black-24.2.0-py3-none-any.whl", hash = "sha256:e8a6ae970537e67830776488bca52000eaa37fa63b9988e8c487458d9cd5ace6"}, - {file = "black-24.2.0.tar.gz", hash = "sha256:bce4f25c27c3435e4dace4815bcb2008b87e167e3bf4ee47ccdc5ce906eb4894"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - [[package]] name = "certifi" version = "2024.2.2" @@ -485,21 +430,6 @@ files = [ {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] -[[package]] -name = "dill" -version = "0.3.8" -description = "serialize all of Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, - {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, -] - -[package.extras] -graph = ["objgraph (>=1.7.2)"] -profile = ["gprof2dot (>=2022.7.29)"] - [[package]] name = "distlib" version = "0.3.8" @@ -745,20 +675,6 @@ qtconsole = ["qtconsole"] test = ["pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "testpath"] test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"] -[[package]] -name = "isort" -version = "5.13.2" -description = "A Python utility / library to sort Python imports." -optional = false -python-versions = ">=3.8.0" -files = [ - {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, - {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, -] - -[package.extras] -colors = ["colorama (>=0.4.6)"] - [[package]] name = "jedi" version = "0.19.1" @@ -984,17 +900,6 @@ files = [ [package.dependencies] traitlets = "*" -[[package]] -name = "mccabe" -version = "0.7.0" -description = "McCabe checker, plugin for flake8" -optional = false -python-versions = ">=3.6" -files = [ - {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, - {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, -] - [[package]] name = "mlflow-skinny" version = "2.10.2" @@ -1348,17 +1253,6 @@ files = [ qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] testing = ["docopt", "pytest (<6.0.0)"] -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - [[package]] name = "pdoc" version = "13.1.1" @@ -1745,39 +1639,15 @@ files = [ plugins = ["importlib-metadata"] windows-terminal = ["colorama (>=0.4.6)"] -[[package]] -name = "pylint" -version = "3.0.4" -description = "python code static checker" -optional = false -python-versions = ">=3.8.0" -files = [ - {file = "pylint-3.0.4-py3-none-any.whl", hash = "sha256:59ab3532506f32affefeb50d5057a221bb351f5a1383fa36c424c2c6c05e7005"}, - {file = "pylint-3.0.4.tar.gz", hash = "sha256:d73b70b3fff8f3fbdcb49a209b9c7d71d8090c138d61d576d1895e152cb392b3"}, -] - -[package.dependencies] -astroid = ">=3.0.1,<=3.1.0-dev0" -colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} -dill = {version = ">=0.3.7", markers = "python_version >= \"3.12\""} -isort = ">=4.2.5,<5.13.0 || >5.13.0,<6" -mccabe = ">=0.6,<0.8" -platformdirs = ">=2.2.0" -tomlkit = ">=0.10.1" - -[package.extras] -spelling = ["pyenchant (>=3.2,<4.0)"] -testutils = ["gitpython (>3)"] - [[package]] name = "pytest" -version = "8.0.1" +version = "8.0.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.0.1-py3-none-any.whl", hash = "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca"}, - {file = "pytest-8.0.1.tar.gz", hash = "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae"}, + {file = "pytest-8.0.2-py3-none-any.whl", hash = "sha256:edfaaef32ce5172d5466b5127b42e0d6d35ebbe4453f0e3505d96afd93f6b096"}, + {file = "pytest-8.0.2.tar.gz", hash = "sha256:d4051d623a2e0b7e51960ba963193b09ce6daeb9759a451844a21e4ddedfc1bd"}, ] [package.dependencies] @@ -2198,6 +2068,32 @@ files = [ {file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"}, ] +[[package]] +name = "ruff" +version = "0.2.2" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0a9efb032855ffb3c21f6405751d5e147b0c6b631e3ca3f6b20f917572b97eb6"}, + {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d450b7fbff85913f866a5384d8912710936e2b96da74541c82c1b458472ddb39"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecd46e3106850a5c26aee114e562c329f9a1fbe9e4821b008c4404f64ff9ce73"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e22676a5b875bd72acd3d11d5fa9075d3a5f53b877fe7b4793e4673499318ba"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1695700d1e25a99d28f7a1636d85bafcc5030bba9d0578c0781ba1790dbcf51c"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b0c232af3d0bd8f521806223723456ffebf8e323bd1e4e82b0befb20ba18388e"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f63d96494eeec2fc70d909393bcd76c69f35334cdbd9e20d089fb3f0640216ca"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a61ea0ff048e06de273b2e45bd72629f470f5da8f71daf09fe481278b175001"}, + {file = "ruff-0.2.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1439c8f407e4f356470e54cdecdca1bd5439a0673792dbe34a2b0a551a2fe3"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:940de32dc8853eba0f67f7198b3e79bc6ba95c2edbfdfac2144c8235114d6726"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0c126da55c38dd917621552ab430213bdb3273bb10ddb67bc4b761989210eb6e"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3b65494f7e4bed2e74110dac1f0d17dc8e1f42faaa784e7c58a98e335ec83d7e"}, + {file = "ruff-0.2.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1ec49be4fe6ddac0503833f3ed8930528e26d1e60ad35c2446da372d16651ce9"}, + {file = "ruff-0.2.2-py3-none-win32.whl", hash = "sha256:d920499b576f6c68295bc04e7b17b6544d9d05f196bb3aac4358792ef6f34325"}, + {file = "ruff-0.2.2-py3-none-win_amd64.whl", hash = "sha256:cc9a91ae137d687f43a44c900e5d95e9617cb37d4c989e462980ba27039d239d"}, + {file = "ruff-0.2.2-py3-none-win_arm64.whl", hash = "sha256:c9d15fc41e6054bfc7200478720570078f0b41c9ae4f010bcc16bd6f4d1aacdd"}, + {file = "ruff-0.2.2.tar.gz", hash = "sha256:e62ed7f36b3068a30ba39193a14274cd706bc486fad521276458022f7bccb31d"}, +] + [[package]] name = "scikit-learn" version = "1.4.1.post1" @@ -2380,17 +2276,6 @@ files = [ {file = "threadpoolctl-3.3.0.tar.gz", hash = "sha256:5dac632b4fa2d43f42130267929af3ba01399ef4bd1882918e92dbc30365d30c"}, ] -[[package]] -name = "tomlkit" -version = "0.12.3" -description = "Style preserving TOML library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "tomlkit-0.12.3-py3-none-any.whl", hash = "sha256:b0a645a9156dc7cb5d3a1f0d4bab66db287fcb8e0430bdd4664a095ea16414ba"}, - {file = "tomlkit-0.12.3.tar.gz", hash = "sha256:75baf5012d06501f07bee5bf8e801b9f343e7aac5a92581f20f80ce632e6b5a4"}, -] - [[package]] name = "tornado" version = "6.4" @@ -2648,4 +2533,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "acc5b17bf55180e68fa88749c6aaecec3dfba5c15d37138fa484371d37b774b0" +content-hash = "61b7d973e3f8a95f783d23af1f8e52f81e5ca832fc8ed62b07c9635eba9987e8" diff --git a/pyproject.toml b/pyproject.toml index 9360c66..d3c43ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [tool.poetry] name = "bikes" -version = "0.3.0" +version = "0.4.0" description = "Predict the number of bikes available." repository = "https://github.com/fmind/mlops-python-package" authors = ["Médéric HURIER "] @@ -44,15 +44,11 @@ pdoc = "^13.1.1" [tool.poetry.group.checks.dependencies] coverage = "^7.4.1" mypy = "^1.8.0" -pylint = "^3.0.3" pytest = "^8.0.0" pytest-cov = "^4.1.0" pytest-xdist = "^3.5.0" pandera = { extras = ["mypy"], version = "^0.18.0" } - -[tool.poetry.group.formats.dependencies] -black = "^24.1.1" -isort = "^5.13.2" +ruff = "^0.2.2" [tool.poetry.group.notebooks.dependencies] ipykernel = "^6.29.0" @@ -60,26 +56,31 @@ nbformat = "^5.9.2" # CONFIGURATIONS -[tool.black] -line-length = 120 - [tool.coverage.run] branch = true source = ["src"] -[tool.isort] -line_length = 120 -profile = "black" - [tool.mypy] check_untyped_defs = true ignore_missing_imports = true plugins = ["pandera.mypy", "pydantic.mypy"] -[tool.pylint.MASTER] -disable = ['too-few-public-methods', 'too-many-arguments', 'too-many-locals'] -max-line-length = 120 -extension-pkg-whitelist = "pydantic" +[tool.ruff] +fix = true +line-length = 100 +target-version = "py312" + +[tool.ruff.format] +docstring-code-format = true + +[tool.ruff.lint] +select = ["D"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"tests/*.py" = ["D100", "D103"] # SYSTEMS diff --git a/src/bikes/__main__.py b/src/bikes/__main__.py new file mode 100644 index 0000000..a50876a --- /dev/null +++ b/src/bikes/__main__.py @@ -0,0 +1,10 @@ +"""Entry point of the package.""" + +# %% IMPORTS + +from bikes import scripts + +# %% MAIN + +if __name__ == "__main__": + scripts.main() diff --git a/src/bikes/configs.py b/src/bikes/configs.py index 695fa7f..d5d9184 100644 --- a/src/bikes/configs.py +++ b/src/bikes/configs.py @@ -24,7 +24,6 @@ def parse_file(path: str) -> Config: Config: representation of the config file. """ any_path = AnyPath(path) - # pylint: disable=no-member text = any_path.read_text() # type: ignore config = OmegaConf.create(text) return config diff --git a/src/bikes/jobs.py b/src/bikes/jobs.py index f65fac5..e310f7e 100644 --- a/src/bikes/jobs.py +++ b/src/bikes/jobs.py @@ -131,7 +131,11 @@ def run(self) -> Locals: # searcher logger.info("Execute searcher: {}", self.searcher) results, best_score, best_params = self.searcher.search( - model=self.model, metric=self.metric, cv=self.splitter, inputs=inputs, targets=targets + model=self.model, + metric=self.metric, + cv=self.splitter, + inputs=inputs, + targets=targets, ) logger.info("- # Results: {}", len(results)) logger.info("- Best Score: {}", best_score) @@ -143,7 +147,7 @@ def run(self) -> Locals: class TrainingJob(Job): - """Train and register a single AI/ML model + """Train and register a single AI/ML model. Attributes: run_name (str): name of the MLflow experiment run. @@ -207,8 +211,12 @@ def run(self) -> Locals: logger.info("- Targets train shape: {}", targets_train.shape) logger.info("- Targets test shape: {}", targets_test.shape) # - asserts - assert len(inputs_train) == len(targets_train), "Inputs and targets train should have the same length!" - assert len(inputs_test) == len(targets_test), "Inputs and targets test should have the same length!" + assert len(inputs_train) == len( + targets_train + ), "Inputs and targets train should have the same length!" + assert len(inputs_test) == len( + targets_test + ), "Inputs and targets test should have the same length!" # model logger.info("Fit model: {}", self.model) self.model.fit(inputs=inputs_train, targets=targets_train) @@ -216,7 +224,9 @@ def run(self) -> Locals: logger.info("Predict outputs: {}", len(inputs_test)) outputs_test = self.model.predict(inputs=inputs_test) logger.info("- Outputs test shape: {}", outputs_test.shape) - assert len(inputs_test) == len(outputs_test), "Inputs and outputs test should have the same length!" + assert len(inputs_test) == len( + outputs_test + ), "Inputs and outputs test should have the same length!" # scorers for i, scorer in enumerate(self.scorers, start=1): logger.info("{}. Run scorer: {}", i, scorer) diff --git a/src/bikes/metrics.py b/src/bikes/metrics.py index 8e6dfe1..c7706e3 100644 --- a/src/bikes/metrics.py +++ b/src/bikes/metrics.py @@ -39,7 +39,9 @@ def score(self, targets: schemas.Targets, outputs: schemas.Outputs) -> float: float: metric result. """ - def scorer(self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets) -> float: + def scorer( + self, model: models.Model, inputs: schemas.Inputs, targets: schemas.Targets + ) -> float: """Score the model outputs against the targets. Args: diff --git a/src/bikes/models.py b/src/bikes/models.py index be3dc87..afad913 100644 --- a/src/bikes/models.py +++ b/src/bikes/models.py @@ -28,7 +28,6 @@ class Model(abc.ABC, pdt.BaseModel, strict=True): KIND: str - # pylint: disable=unused-argument def get_params(self, deep: bool = True) -> Params: """Get the model params. @@ -117,7 +116,9 @@ class BaselineSklearnModel(Model): @T.override def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSklearnModel": # subcomponents - categoricals_transformer = preprocessing.OneHotEncoder(sparse_output=False, handle_unknown="ignore") + categoricals_transformer = preprocessing.OneHotEncoder( + sparse_output=False, handle_unknown="ignore" + ) # components transformer = compose.ColumnTransformer( [ @@ -143,7 +144,9 @@ def fit(self, inputs: schemas.Inputs, targets: schemas.Targets) -> "BaselineSkle def predict(self, inputs: schemas.Inputs) -> schemas.Outputs: assert self._pipeline is not None, "Model should be fitted first!" prediction = self._pipeline.predict(inputs) # return an np.ndarray, not a dataframe! - outputs = schemas.Outputs({schemas.OutputsSchema.prediction: prediction}, index=inputs.index) + outputs = schemas.Outputs( + {schemas.OutputsSchema.prediction: prediction}, index=inputs.index + ) return outputs diff --git a/src/bikes/registers.py b/src/bikes/registers.py index 5b1fca1..cb7674e 100644 --- a/src/bikes/registers.py +++ b/src/bikes/registers.py @@ -34,8 +34,9 @@ def __init__(self, model: models.Model): """ self.model = model - # pylint: disable=arguments-differ, unused-argument - def predict(self, context: mlflow.pyfunc.PythonModelContext, inputs: schemas.Inputs) -> schemas.Outputs: + def predict( + self, context: mlflow.pyfunc.PythonModelContext, inputs: schemas.Inputs + ) -> schemas.Outputs: """Generate predictions from a custom model. Args: @@ -105,7 +106,9 @@ class Saver(abc.ABC, pdt.BaseModel, strict=True): path: str = "model" @abc.abstractmethod - def save(self, model: models.Model, signature: Signature, input_example: schemas.Inputs) -> Info: + def save( + self, model: models.Model, signature: Signature, input_example: schemas.Inputs + ) -> Info: """Save a model in the model registry. Args: @@ -126,11 +129,16 @@ class CustomSaver(Saver): KIND: T.Literal["CustomSaver"] = "CustomSaver" - def save(self, model: models.Model, signature: Signature, input_example: schemas.Inputs) -> Info: + def save( + self, model: models.Model, signature: Signature, input_example: schemas.Inputs + ) -> Info: """Save a custom model to the MLflow Model Registry.""" custom = CustomAdapter(model=model) # adapt model return mlflow.pyfunc.log_model( - artifact_path=self.path, python_model=custom, signature=signature, input_example=input_example + artifact_path=self.path, + python_model=custom, + signature=signature, + input_example=input_example, ) diff --git a/src/bikes/schemas.py b/src/bikes/schemas.py index 856d85f..62193b5 100644 --- a/src/bikes/schemas.py +++ b/src/bikes/schemas.py @@ -33,6 +33,7 @@ def check(cls, data: pd.DataFrame, **kwargs): Args: data (pd.DataFrame): dataframe to check. + kwargs: additional arguments to validate(). Returns: pd.DataFrame: validated dataframe with schema. diff --git a/src/bikes/scripts.py b/src/bikes/scripts.py index 5ae1476..0858d82 100644 --- a/src/bikes/scripts.py +++ b/src/bikes/scripts.py @@ -1,4 +1,4 @@ -"""Entry point of the program.""" +"""Command-line interface for the program.""" # %% IMPORTS @@ -17,7 +17,7 @@ class Settings(pdts.BaseSettings, strict=True): """Settings for the program. Attributes: - job (jobs.JobKind): job associated with the settings. + job (jobs.JobKind): job associated with settings. """ job: jobs.JobKind = pdt.Field(..., discriminator="KIND") @@ -25,7 +25,7 @@ class Settings(pdts.BaseSettings, strict=True): # %% PARSERS -parser = argparse.ArgumentParser(description="Run a single job from external settings.") +parser = argparse.ArgumentParser(prog="bikes", description="Run an ML job from configs.") parser.add_argument("configs", nargs="+", help="Config files for the job (local or remote).") parser.add_argument("-e", "--extras", nargs="+", default=[], help="Config strings for the job.") parser.add_argument("-s", "--schema", action="store_true", help="Print settings schema and exit.") diff --git a/src/bikes/services.py b/src/bikes/services.py index 4493c1e..2df347e 100644 --- a/src/bikes/services.py +++ b/src/bikes/services.py @@ -134,7 +134,9 @@ def client(self) -> MlflowClient: """Get an instance of MLflow client.""" return MlflowClient(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) - def register(self, run_id: str, path: str, alias: str) -> mlflow.entities.model_registry.ModelVersion: + def register( + self, run_id: str, path: str, alias: str + ) -> mlflow.entities.model_registry.ModelVersion: """Register a model to mlflow registry. Args: @@ -148,5 +150,7 @@ def register(self, run_id: str, path: str, alias: str) -> mlflow.entities.model_ client = self.client() model_uri = f"runs:/{run_id}/{path}" version = mlflow.register_model(model_uri=model_uri, name=self.registry_name) - client.set_registered_model_alias(name=self.registry_name, alias=alias, version=version.version) + client.set_registered_model_alias( + name=self.registry_name, alias=alias, version=version.version + ) return version diff --git a/src/bikes/splitters.py b/src/bikes/splitters.py index 4c03774..b0d82d0 100644 --- a/src/bikes/splitters.py +++ b/src/bikes/splitters.py @@ -32,7 +32,9 @@ class Splitter(abc.ABC, pdt.BaseModel, strict=True): KIND: str @abc.abstractmethod - def split(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None) -> Splits: + def split( + self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None + ) -> Splits: """Split a dataframe into subsets. Args: @@ -45,7 +47,9 @@ def split(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | """ @abc.abstractmethod - def get_n_splits(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None) -> int: + def get_n_splits( + self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None + ) -> int: """Get the number of splits generated. Args: @@ -74,7 +78,9 @@ class TrainTestSplitter(Splitter): random_state: int = 42 @T.override - def split(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None) -> Splits: + def split( + self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None + ) -> Splits: index = np.arange(len(inputs)) # return integer position train_index, test_index = model_selection.train_test_split( index, shuffle=self.shuffle, test_size=self.test_size, random_state=self.random_state @@ -82,7 +88,9 @@ def split(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | yield train_index, test_index @T.override - def get_n_splits(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None) -> int: + def get_n_splits( + self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None + ) -> int: return 1 @@ -102,12 +110,16 @@ class TimeSeriesSplitter(Splitter): test_size: int | float = 24 * 30 * 2 # 2 months @T.override - def split(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None) -> Splits: + def split( + self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None + ) -> Splits: splitter = model_selection.TimeSeriesSplit(n_splits=self.n_splits, test_size=self.test_size) yield from splitter.split(inputs) @T.override - def get_n_splits(self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None) -> int: + def get_n_splits( + self, inputs: schemas.Inputs, targets: schemas.Targets, groups: list | None = None + ) -> int: return self.n_splits diff --git a/tasks/checks.py b/tasks/checks.py index 7dff32f..84430dd 100644 --- a/tasks/checks.py +++ b/tasks/checks.py @@ -1,7 +1,5 @@ """Check tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import task @@ -16,31 +14,27 @@ @task -def code(ctx: Context) -> None: - """Check the codes with pylint.""" - ctx.run("poetry run pylint src/ tasks/ tests/") +def poetry(ctx: Context) -> None: + """Check poetry config files.""" + ctx.run("poetry check --lock") @task -def coverage(ctx: Context) -> None: - """Check the coverage with coverage.""" - ctx.run( - f"poetry run pytest --numprocesses={PYTEST_N_PROCESSES}" - f" --cov=src/ --cov-fail-under={COVERAGE_FAIL_UNDER} tests/" - ) +def format(ctx: Context) -> None: + """Check the formats with ruff.""" + ctx.run("poetry run ruff format --check src/ tasks/ tests/") @task -def format(ctx: Context) -> None: - """Check the formats with isort and black.""" - ctx.run("poetry run isort --check src/ tasks/ tests/") - ctx.run("poetry run black --check src/ tasks/ tests/") +def type(ctx: Context) -> None: + """Check the types with mypy.""" + ctx.run("poetry run mypy src/ tasks/ tests/") @task -def poetry(ctx: Context) -> None: - """Check poetry config files.""" - ctx.run("poetry check") +def code(ctx: Context) -> None: + """Check the codes with ruff.""" + ctx.run("poetry run ruff check src/ tasks/ tests/") @task @@ -50,11 +44,14 @@ def test(ctx: Context) -> None: @task -def type(ctx: Context) -> None: - """Check the types with mypy.""" - ctx.run("poetry run mypy src/ tasks/ tests/") +def coverage(ctx: Context) -> None: + """Check the coverage with coverage.""" + ctx.run( + f"poetry run pytest --numprocesses={PYTEST_N_PROCESSES}" + f" --cov=src/ --cov-fail-under={COVERAGE_FAIL_UNDER} tests/" + ) -@task(pre=[type, code, coverage, format, poetry], default=True) +@task(pre=[poetry, format, type, code, coverage], default=True) def all(_: Context) -> None: """Run all check tasks.""" diff --git a/tasks/cleans.py b/tasks/cleans.py index 446fe2c..d455d0f 100644 --- a/tasks/cleans.py +++ b/tasks/cleans.py @@ -1,7 +1,5 @@ """Clean tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import task @@ -9,13 +7,36 @@ # %% TASKS +# %% - Tools + + +@task +def mypy(ctx: Context) -> None: + """Clean the mypy tool.""" + ctx.run("rm -rf .mypy_cache/") + + +@task +def ruff(ctx: Context) -> None: + """Clean the ruff tool.""" + ctx.run("ruff clean") + + +@task +def pytest(ctx: Context) -> None: + """Clean the pytest tool.""" + ctx.run("rm -rf .pytest_cache/") + @task def coverage(ctx: Context) -> None: - """Clean coverage files.""" + """Clean coverage tool.""" ctx.run("rm -f .coverage*") +# %% - Folders + + @task def dist(ctx: Context) -> None: """Clean the dist folder.""" @@ -29,16 +50,9 @@ def docs(ctx: Context) -> None: @task -def install(ctx: Context) -> None: - """Clean the install.""" - ctx.run("rm -rf .venv/") - ctx.run("rm -f poetry.lock") - - -@task -def mypy(ctx: Context) -> None: - """Clean the mypy folder.""" - ctx.run("rm -rf .mypy_cache/") +def cache(ctx: Context) -> None: + """Clean the cache folder.""" + ctx.run("rm -rf .cache/") @task @@ -47,24 +61,51 @@ def outputs(ctx: Context) -> None: ctx.run("rm -rf outputs/*") +# %% - Sources + + @task -def pytest(ctx: Context) -> None: - """Clean the pytest folder.""" - ctx.run("rm -rf .pytest_cache/") +def venv(ctx: Context) -> None: + """Clean the venv folder.""" + ctx.run("rm -rf .venv/") + + +@task +def poetry(ctx: Context) -> None: + """Clean poetry lock file.""" + ctx.run("rm -f poetry.lock") @task def python(ctx: Context) -> None: - """Clean python files and folders.""" + """Clean python caches and bytecodes.""" ctx.run("find . -type f -name '*.py[co]' -delete") ctx.run(r"find . -type d -name __pycache__ -exec rm -r {} \+") -@task(pre=[coverage, dist, docs, mypy, pytest, python], default=True) +# %% - Combines + + +@task(pre=[mypy, ruff, pytest, coverage]) +def tools(_: Context) -> None: + """Run all tools tasks.""" + + +@task(pre=[dist, docs, cache, outputs]) +def folders(_: Context) -> None: + """Run all folders tasks.""" + + +@task(pre=[venv, poetry, python]) +def sources(_: Context) -> None: + """Run all folders tasks.""" + + +@task(pre=[tools, folders], default=True) def all(_: Context) -> None: - """Run all clean tasks.""" + """Run all tools and folders tasks.""" -@task(pre=[all, outputs, install]) +@task(pre=[all, sources]) def reset(_: Context) -> None: - """Reset the project state.""" + """Run all tools, folders, and sources tasks.""" diff --git a/tasks/containers.py b/tasks/containers.py index 012d6a4..4b65154 100644 --- a/tasks/containers.py +++ b/tasks/containers.py @@ -1,7 +1,5 @@ """Docker tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import task @@ -9,6 +7,10 @@ from . import packages +# %% CONFIGS + +IMAGE_TAG = "latest" + # %% TASKS @@ -21,13 +23,13 @@ def compose(ctx: Context) -> None: @task(pre=[packages.build]) def build(ctx: Context) -> None: """Build the container image.""" - ctx.run(f"docker build -t {ctx.project.name}:latest .") + ctx.run(f"docker build -t {ctx.project.name}:{IMAGE_TAG} .") @task def run(ctx: Context) -> None: """Run the container image.""" - ctx.run(f"docker run --rm {ctx.project.name}:latest") + ctx.run(f"docker run --rm {ctx.project.name}:{IMAGE_TAG}") @task(pre=[build], default=True) diff --git a/tasks/dags.py b/tasks/dags.py index 45468ed..16217e1 100644 --- a/tasks/dags.py +++ b/tasks/dags.py @@ -1,7 +1,5 @@ """DAG tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import call, task diff --git a/tasks/docs.py b/tasks/docs.py index 91ad2be..d18d656 100644 --- a/tasks/docs.py +++ b/tasks/docs.py @@ -1,7 +1,5 @@ """Docs tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import task @@ -12,6 +10,7 @@ # %% CONFIGS DOC_FORMAT = "google" +OUTPUT_DIR = "docs/" # %% TASKS @@ -19,7 +18,9 @@ @task def api(ctx: Context) -> None: """Document the API with pdoc.""" - ctx.run(f"poetry run pdoc --docformat={DOC_FORMAT} --output-directory=docs/ src/{ctx.project.name}") + ctx.run( + f"poetry run pdoc --docformat={DOC_FORMAT} --output-directory={OUTPUT_DIR} src/{ctx.project.name}" + ) @task diff --git a/tasks/formats.py b/tasks/formats.py index 0a29dbc..71dd031 100644 --- a/tasks/formats.py +++ b/tasks/formats.py @@ -1,7 +1,5 @@ """Format tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import task @@ -11,17 +9,11 @@ @task -def imports(ctx: Context) -> None: - """Format code imports with isort.""" - ctx.run("poetry run isort src/ tasks/ tests/") - - -@task -def sources(ctx: Context) -> None: - """Format code sources with black.""" - ctx.run("poetry run black src/ tasks/ tests/") +def code(ctx: Context) -> None: + """Format code with ruff.""" + ctx.run("poetry run ruff format src/ tasks/ tests/") -@task(pre=[imports, sources], default=True) +@task(pre=[code], default=True) def all(_: Context) -> None: """Run all format tasks.""" diff --git a/tasks/installs.py b/tasks/installs.py index d8c3026..f8f78bc 100644 --- a/tasks/installs.py +++ b/tasks/installs.py @@ -1,7 +1,5 @@ """Install tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import task diff --git a/tasks/packages.py b/tasks/packages.py index 19b85ff..fe6ddce 100644 --- a/tasks/packages.py +++ b/tasks/packages.py @@ -1,7 +1,5 @@ """Package tasks for pyinvoke.""" -# pylint: disable=redefined-builtin - # %% IMPORTS from invoke import task diff --git a/tests/conftest.py b/tests/conftest.py index 86653cb..3639bb9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,5 @@ """Configuration for the tests.""" -# pylint: disable=redefined-outer-name - # %% IMPORTS import os @@ -168,7 +166,9 @@ def time_series_splitter() -> splitters.TimeSeriesSplitter: @pytest.fixture(scope="session") def train_test_split( - train_test_splitter: splitters.TrainTestSplitter, inputs: schemas.Inputs, targets: schemas.Targets + train_test_splitter: splitters.TrainTestSplitter, + inputs: schemas.Inputs, + targets: schemas.Targets, ) -> splitters.TrainTest: """Return the train and test indexes for the inputs dataframe.""" return next(train_test_splitter.split(inputs=inputs, targets=targets)) @@ -294,7 +294,9 @@ def test_data_resolver() -> str: """Get data_path.""" return data_path - omegaconf.OmegaConf.register_new_resolver("test_data_path", test_data_resolver, use_cache=True, replace=True) + omegaconf.OmegaConf.register_new_resolver( + "test_data_path", test_data_resolver, use_cache=True, replace=True + ) return data_path @@ -331,5 +333,7 @@ def default_mlflow_model_version( """Return an MLflow version for the default model.""" with mlflow.start_run(run_name="Default") as run: default_saver.save(model=default_model, signature=default_signature, input_example=inputs) - version = mlflow_service.register(run_id=run.info.run_id, path=default_saver.path, alias=default_alias) + version = mlflow_service.register( + run_id=run.info.run_id, path=default_saver.path, alias=default_alias + ) return version diff --git a/tests/test_configs.py b/tests/test_configs.py index 0ef2c7b..7e85251 100644 --- a/tests/test_configs.py +++ b/tests/test_configs.py @@ -1,12 +1,9 @@ -# pylint: disable=missing-docstring - # %% IMPORTS import os -from omegaconf import OmegaConf - from bikes import configs +from omegaconf import OmegaConf # %% LOADERS diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 42b927a..14c8d86 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,11 +1,8 @@ -# pylint: disable=missing-docstring - # %% IMPORTS import os import pytest - from bikes import datasets, schemas # %% READERS diff --git a/tests/test_jobs.py b/tests/test_jobs.py index 349f44a..b61f13e 100644 --- a/tests/test_jobs.py +++ b/tests/test_jobs.py @@ -1,5 +1,3 @@ -# pylint: disable=missing-docstring - # %% IMPORTS import os @@ -58,14 +56,20 @@ def test_tuning_job( # - search assert len(out["results"]) == n_trials, "Results should one row per candidate!" assert float("-inf") <= out["best_score"] <= float("+inf"), "Best score should be a float!" - assert out["best_params"].keys() == default_searcher.param_grid.keys(), "Best params should have the same keys!" + assert ( + out["best_params"].keys() == default_searcher.param_grid.keys() + ), "Best params should have the same keys!" # - write assert os.path.exists(tmp_results_writer.path), "Results should be saved to the given path!" # - mlflow tracking mlflow_experiment = mlflow_client.get_experiment_by_name(name=mlflow_service.experiment_name) - assert mlflow_experiment.name == mlflow_service.experiment_name, "Mlflow experiment name should be the same!" + assert ( + mlflow_experiment.name == mlflow_service.experiment_name + ), "Mlflow experiment name should be the same!" mlflow_runs = mlflow_client.search_runs(experiment_ids=mlflow_experiment.experiment_id) - assert len(mlflow_runs) == n_trials + 1, "There should be as many Mlflow runs as trials + 1 for parent!" + assert ( + len(mlflow_runs) == n_trials + 1 + ), "There should be as many Mlflow runs as trials + 1 for parent!" def test_training_job( @@ -128,7 +132,9 @@ def test_training_job( assert out["targets"].ndim == 2, "Target should be a dataframe!" # - split assert ( - len(out["train_index"]) + len(out["test_index"]) == len(out["inputs"]) == len(out["targets"]) + len(out["train_index"]) + len(out["test_index"]) + == len(out["inputs"]) + == len(out["targets"]) ), "Train and test indexes should have the same length as inputs! and targets!" assert ( len(out["inputs_train"]) == len(out["targets_train"]) == len(out["train_index"]) @@ -137,11 +143,17 @@ def test_training_job( len(out["inputs_test"]) == len(out["targets_test"]) == len(out["test_index"]) ), "Inputs and targets test should have the same length as test index!" # - outputs - assert len(out["outputs_test"]) == len(out["inputs_test"]), "Outputs should have the same length as inputs!" - assert out["outputs_test"].shape == out["targets_test"].shape, "Outputs should have the same shape as targets!" + assert len(out["outputs_test"]) == len( + out["inputs_test"] + ), "Outputs should have the same length as inputs!" + assert ( + out["outputs_test"].shape == out["targets_test"].shape + ), "Outputs should have the same shape as targets!" # - score assert out["i"] == len(scorers), "i should have the same length as scorers!" - assert float("-inf") <= out["score"] <= float("+inf"), "Score should be between a numeric value!" + assert ( + float("-inf") <= out["score"] <= float("+inf") + ), "Score should be between a numeric value!" # - signature assert out["signature"].inputs is not None, "Signature inputs should not be None!" assert out["signature"].outputs is not None, "Signature outputs should not be None!" @@ -154,12 +166,18 @@ def test_training_job( assert out["version"].run_id == out["run"].info.run_id, "Version run id should be the same!" # - mlflow tracking mlflow_experiment = mlflow_client.get_experiment_by_name(name=mlflow_service.experiment_name) - assert mlflow_experiment.name == mlflow_service.experiment_name, "MLflow Experiment name should be the same!" + assert ( + mlflow_experiment.name == mlflow_service.experiment_name + ), "MLflow Experiment name should be the same!" mlflow_runs = mlflow_client.search_runs(experiment_ids=mlflow_experiment.experiment_id) assert len(mlflow_runs) == 1, "There should be a single MLflow run for the training!" # - mlflow registry - model_version = mlflow_client.get_model_version(name=mlflow_service.registry_name, version=out["version"].version) - assert model_version.run_id == out["run"].info.run_id, "MLFlow model version run id should be the same!" + model_version = mlflow_client.get_model_version( + name=mlflow_service.registry_name, version=out["version"].version + ) + assert ( + model_version.run_id == out["run"].info.run_id + ), "MLFlow model version run id should be the same!" def test_inference_job( @@ -200,7 +218,11 @@ def test_inference_job( assert registry_alias in out["uri"], "URI should contain the registry alias!" # - model assert out["model"].metadata.signature is not None, "Model should have a valid signature!" - assert out["model"].metadata.flavors.get("python_function"), "Model should have a pyfunc flavor!" - assert out["model"].metadata.run_id == default_mlflow_model_version.run_id, "Model run id should be the same!" + assert out["model"].metadata.flavors.get( + "python_function" + ), "Model should have a pyfunc flavor!" + assert ( + out["model"].metadata.run_id == default_mlflow_model_version.run_id + ), "Model run id should be the same!" # - outputs assert os.path.exists(tmp_outputs_writer.path), "Outputs should be saved to the given path!" diff --git a/tests/test_metrics.py b/tests/test_metrics.py index d782343..e6fe617 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,9 +1,6 @@ -# pylint: disable=missing-docstring - # %% IMPORTS import pytest - from bikes import metrics, models, schemas # %% METRICS diff --git a/tests/test_models.py b/tests/test_models.py index 9bd8fd0..dd5fdaf 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,5 +1,3 @@ -# pylint: disable=missing-docstring - # %% IMPORTS import typing as T @@ -32,11 +30,14 @@ def predict(self, inputs): params_set_params = model.set_params(b=20).get_params() # then assert params_init == {"a": 10, "b": 2}, "Model should get the right params at init!" - assert params_set_params == {"a": 10, "b": 20}, "Model should set the right params after set_params!" + assert params_set_params == { + "a": 10, + "b": 20, + }, "Model should set the right params after set_params!" def test_baseline_sklearn_model( - train_test_sets: tuple[schemas.Inputs, schemas.Targets, schemas.Inputs, schemas.Targets] + train_test_sets: tuple[schemas.Inputs, schemas.Targets, schemas.Inputs, schemas.Targets], ): # given params = {"max_depth": 3, "n_estimators": 5, "random_state": 0} diff --git a/tests/test_registers.py b/tests/test_registers.py index ad2e58f..eae3bb5 100644 --- a/tests/test_registers.py +++ b/tests/test_registers.py @@ -1,11 +1,8 @@ """Test the registers module.""" -# pylint: disable=missing-docstring - # %% IMPORTS import mlflow - from bikes import models, registers, schemas, services # %% ADAPTERS @@ -40,7 +37,9 @@ def test_infer_signer(inputs: schemas.Inputs, outputs: schemas.Outputs): # %% SAVERS -def test_custom_saver(inputs: schemas.Inputs, default_model: models.Model, default_signature: registers.Signature): +def test_custom_saver( + inputs: schemas.Inputs, default_model: models.Model, default_signature: registers.Signature +): # given path = "custom" saver = registers.CustomSaver(path=path) @@ -58,7 +57,9 @@ def test_custom_saver(inputs: schemas.Inputs, default_model: models.Model, defau def test_custom_loader( - inputs: schemas.Inputs, mlflow_service: services.MLflowService, default_mlflow_model_version: registers.Version + inputs: schemas.Inputs, + mlflow_service: services.MLflowService, + default_mlflow_model_version: registers.Version, ): # given name = mlflow_service.registry_name @@ -71,7 +72,11 @@ def test_custom_loader( # then # # - model assert model.metadata.signature is not None, "The model should have a valid signature!" - assert model.metadata.run_id == default_mlflow_model_version.run_id, "The model run id should be the same!" - assert model.metadata.flavors.get("python_function") is not None, "The model should have a python_function flavor!" + assert ( + model.metadata.run_id == default_mlflow_model_version.run_id + ), "The model run id should be the same!" + assert ( + model.metadata.flavors.get("python_function") is not None + ), "The model should have a python_function flavor!" # - output assert schemas.OutputsSchema.check(outputs) is not None, "Outputs should be valid!" diff --git a/tests/test_schemas.py b/tests/test_schemas.py index 4cbd588..f26ef83 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -1,5 +1,3 @@ -# pylint: disable=missing-docstring - # %% IMPORTS from bikes import datasets, schemas diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 3516dbc..80e408e 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -1,12 +1,9 @@ -# pylint: disable=missing-docstring - # %% IMPORTS import json import os import pytest - from bikes import scripts # %% SCRIPTS diff --git a/tests/test_searchers.py b/tests/test_searchers.py index 91718e8..d0821a0 100644 --- a/tests/test_searchers.py +++ b/tests/test_searchers.py @@ -1,5 +1,3 @@ -# pylint: disable=missing-docstring - # %% IMPORTS from bikes import metrics, models, schemas, searchers, splitters @@ -19,9 +17,15 @@ def test_grid_cv_searcher( searcher = searchers.GridCVSearcher(param_grid=param_grid) # when result, best_score, best_params = searcher.search( - model=default_model, metric=default_metric, cv=time_series_splitter, inputs=inputs, targets=targets + model=default_model, + metric=default_metric, + cv=time_series_splitter, + inputs=inputs, + targets=targets, ) # then assert set(best_params) == set(param_grid), "Best params should have the same keys as grid!" assert float("-inf") <= best_score <= float("+inf"), "Best score should be a floating number!" - assert len(result) == sum(map(len, param_grid.values())), "Results should have one row per candidate!" + assert len(result) == sum( + map(len, param_grid.values()) + ), "Results should have one row per candidate!" diff --git a/tests/test_services.py b/tests/test_services.py index 0c14ae0..56ce8e3 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -1,11 +1,8 @@ -# pylint: disable=missing-docstring - # %% IMPORTS import mlflow -from loguru import logger - from bikes import services +from loguru import logger # %% SERVICES @@ -33,12 +30,18 @@ def test_mlflow_service(mlflow_service: services.MLflowService): service.stop() # no effect # then # - mlflow - assert mlflow_service.tracking_uri == mlflow.get_tracking_uri(), "Tracking URI should be the same!" - assert mlflow_service.registry_uri == mlflow.get_registry_uri(), "Registry URI should be the same!" - assert mlflow.get_experiment_by_name(mlflow_service.experiment_name), "Experiment should be setup!" - # - client - assert mlflow_service.tracking_uri == client.tracking_uri, "Tracking URI should be the same!" assert ( - mlflow_service.registry_uri == client._registry_uri # pylint: disable=protected-access + mlflow_service.tracking_uri == mlflow.get_tracking_uri() ), "Tracking URI should be the same!" - assert client.get_experiment_by_name(mlflow_service.experiment_name), "Experiment should be setup!" + assert ( + mlflow_service.registry_uri == mlflow.get_registry_uri() + ), "Registry URI should be the same!" + assert mlflow.get_experiment_by_name( + mlflow_service.experiment_name + ), "Experiment should be setup!" + # - client + assert mlflow_service.tracking_uri == client.tracking_uri, "Tracking URI should be the same!" + assert mlflow_service.registry_uri == client._registry_uri, "Tracking URI should be the same!" + assert client.get_experiment_by_name( + mlflow_service.experiment_name + ), "Experiment should be setup!" diff --git a/tests/test_splitters.py b/tests/test_splitters.py index 7acbe71..611f379 100644 --- a/tests/test_splitters.py +++ b/tests/test_splitters.py @@ -1,5 +1,3 @@ -# pylint: disable=missing-docstring - # %% IMPORTS from bikes import schemas, splitters @@ -12,7 +10,9 @@ def test_train_test_splitter(inputs: schemas.Inputs, targets: schemas.Targets): shuffle = True test_size = 50 random_state = 0 - splitter = splitters.TrainTestSplitter(shuffle=shuffle, test_size=test_size, random_state=random_state) + splitter = splitters.TrainTestSplitter( + shuffle=shuffle, test_size=test_size, random_state=random_state + ) # when n_splits = splitter.get_n_splits(inputs=inputs, targets=targets) splits = list(splitter.split(inputs=inputs, targets=targets)) @@ -20,7 +20,9 @@ def test_train_test_splitter(inputs: schemas.Inputs, targets: schemas.Targets): # then assert n_splits == len(splits) == 1, "Splitter should return 1 split!" assert len(test_index) == test_size, "Test index should have the given test size!" - assert len(train_index) == len(inputs) - test_size, "Train index should have the remaining size!" + assert ( + len(train_index) == len(inputs) - test_size + ), "Train index should have the remaining size!" assert not inputs.iloc[test_index].empty, "Test index should be a subset of the inputs!" assert not inputs.iloc[train_index].empty, "Train index should be a subset of the inputs!" @@ -41,6 +43,8 @@ def test_time_series_splitter(inputs: schemas.Inputs, targets: schemas.Targets): assert len(train_index) == ( len(inputs) - test_size * (n_splits - i) ), "Train index should have the cumulative remaining size!" - assert train_index.max() < test_index.min(), "Train index should always be lower than test index!" + assert ( + train_index.max() < test_index.min() + ), "Train index should always be lower than test index!" assert not inputs.iloc[train_index].empty, "Train index should be a subset of the inputs!" assert not inputs.iloc[test_index].empty, "Test index should be a subset of the inputs!"