Merge branch 'main' into dependabot/pip/pylint-3.3.1

Hochfrequenz · Oct 7, 2024 · 188c569 · 188c569
2 parents 2828a55 + cda36d5
commit 188c569
Show file tree

Hide file tree

Showing 26 changed files with 11,135 additions and 10,131 deletions.
diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml
@@ -9,7 +9,7 @@ jobs:
       matrix:
         os: [ ubuntu-latest ]
         python-version: [ "3.12" ]
-        tox-env: [ "test", "lint", "formatcheck", "typecheck", "test_packaging" ]
+        tox-env: [ "test", "lint", "formatcheck", "typecheck", "test_packaging", "dev" ]
     name: ${{ matrix.tox-env }}
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/.github/workflows/conventional-commit-check.yml b/.github/workflows/conventional-commit-check.yml
@@ -4,12 +4,16 @@ on:
   pull_request:
     types: [opened, synchronize, reopened, edited]
 
+permissions:
+  pull-requests: write
+
 jobs:
   validate-pr-title:
     runs-on: ubuntu-latest
     steps:
       - name: PR Conventional Commit Validation
-        uses:  ytanikin/PRConventionalCommits@1.2.0
+        uses:  ytanikin/PRConventionalCommits@1.3.0
         with:
           task_types: '["feat","fix","docs","style","refactor","perf","test","build","ci","chore","revert"]'
-          add_label: 'false'
+          add_label: 'true'
+          custom_labels: '{"feat": "feature", "fix": "fix", "docs": "documentation", "test": "test", "ci": "CI/CD", "refactor": "refactor", "perf": "performance", "chore": "chore", "revert": "revert", "wip": "WIP"}'
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -37,7 +37,11 @@ jobs:
 
    build-n-publish:
      name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
-     runs-on: ubuntu-latest
+     runs-on: ${{ matrix.os }}
+     strategy:
+       matrix:
+         python-version: [ "3.12" ]
+         os: [ ubuntu-latest ]
      # Specifying a GitHub environment, # Specifying a GitHub environment, which is strongly recommended by PyPI: https://docs.pypi.org/trusted-publishers/adding-a-publisher/
      # you have to create an environment in your repository settings and add the environment name here
      environment: release
@@ -47,14 +51,14 @@ jobs:
      needs: test
      steps:
        - uses: actions/checkout@v4
-       - name: Set up Python
+       - name: Set up Python ${{ matrix.python-version }}
          uses: actions/setup-python@v5
          with:
            python-version: ${{ matrix.python-version }}
        - name: Install dependencies
          run: |
            python -m pip install --upgrade pip
-           pip install -r dev_requirements/requirements-test_packaging.txt
+           pip install './.[test_packaging]'
        - name: Build wheel and source distributions
          run: |
            python -m build

diff --git a/pyproject.toml b/pyproject.toml
@@ -58,7 +58,7 @@ test = [
   "freezegun==1.5.1",
   "pytest-datafiles==3.0.0",
   "pytest==8.3.3",
-  "syrupy==4.7.1",
+  "syrupy==4.7.2",
 ]
 typecheck = [
   "mypy==1.11.2",
@@ -79,7 +79,7 @@ formatting = [
     "isort==5.13.2"
 ]
 test_packaging = [
-    "build==1.2.2",
+    "build==1.2.2.post1",
     "twine==5.1.1"
 ]
 

diff --git a/requirements.txt b/requirements.txt
@@ -14,23 +14,23 @@ colorama==0.4.6
     #   colorlog
 colorlog==6.8.2
     # via kohlrahbi (pyproject.toml)
-efoli==1.1.0
+efoli==1.2.0
     # via kohlrahbi (pyproject.toml)
 et-xmlfile==1.1.0
     # via openpyxl
 lxml==5.3.0
     # via python-docx
 more-itertools==10.5.0
     # via kohlrahbi (pyproject.toml)
-numpy==2.1.1
+numpy==2.1.2
     # via pandas
 openpyxl==3.1.5
     # via kohlrahbi (pyproject.toml)
 pandas==2.2.3
     # via kohlrahbi (pyproject.toml)
 pydantic==2.9.2
     # via kohlrahbi (pyproject.toml)
-pydantic-core==2.24.0
+pydantic-core==2.23.4
     # via pydantic
 python-dateutil==2.9.0.post0
     # via pandas

diff --git a/src/kohlrahbi/__init__.py b/src/kohlrahbi/__init__.py
@@ -7,6 +7,7 @@
 from kohlrahbi.ahb.command import ahb
 from kohlrahbi.changehistory.command import changehistory
 from kohlrahbi.conditions.command import conditions
+from kohlrahbi.qualitymap.command import qualitymap
 from kohlrahbi.version import version
 
 
@@ -20,6 +21,7 @@ def cli() -> None:
 cli.add_command(ahb)
 cli.add_command(changehistory)
 cli.add_command(conditions)
+cli.add_command(qualitymap)
 
 if __name__ == "__main__":
     # the parameter arguments gets provided over the CLI

diff --git a/src/kohlrahbi/ahbtable/ahbsubtable.py b/src/kohlrahbi/ahbtable/ahbsubtable.py
@@ -2,14 +2,18 @@
 This module contains the AhbSubTable class.
 """
 
-from typing import Generator
+from typing import Generator, Union
 
+import numpy as np
 import pandas as pd
 from docx.table import Table as DocxTable
 from docx.table import _Cell, _Row
+from docx.text.paragraph import Paragraph
+from numpy.typing import NDArray
 from pydantic import BaseModel, ConfigDict
 
 from kohlrahbi.ahbtable.ahbtablerow import AhbTableRow
+from kohlrahbi.docxtablecells.bodycell import INDEX_OF_CODES_AND_QUALIFIER_COLUMN, KNOW_SUFFIXES
 from kohlrahbi.enums import RowType
 from kohlrahbi.row_type_checker import get_row_type
 from kohlrahbi.seed import Seed
@@ -30,6 +34,7 @@ class AhbSubTable(BaseModel):
     def _parse_docx_table(
         table_meta_data: Seed, ahb_table_dataframe: pd.DataFrame, docx_table: DocxTable
     ) -> pd.DataFrame:
+        """Parse the docx table and add the information to the dataframe."""
         for row in docx_table.rows:
             sanitized_cells = list(AhbSubTable.iter_visible_cells(row=row))
 
@@ -58,16 +63,45 @@ def _parse_docx_table(
 
                 if ahb_table_row_dataframe is not None:
                     ahb_table_dataframe = pd.concat([ahb_table_dataframe, ahb_table_row_dataframe], ignore_index=True)
-            # this case covers the page break situation
             else:
+                # this case covers the page break situation
+
+                # check for conditions_text
+                contains_condition_texts = any(paragraph.text != "" for paragraph in bedingung_cell.paragraphs)
+                # conditions are always at the top of a dataelement
+                # add condition texts
+                if contains_condition_texts:
+                    AhbSubTable.combine_condition_text(ahb_table_dataframe, bedingung_cell)
+
+                # add new row regularly
                 ahb_table_row = AhbTableRow(
                     seed=table_meta_data,
                     edifact_struktur_cell=edifact_struktur_cell,
                     middle_cell=middle_cell,
                     bedingung_cell=bedingung_cell,
                 )
+                ahb_table_row_dataframe = ahb_table_row.parse(row_type=current_row_type)
 
-                ahb_table_row.parse(row_type=table_meta_data.last_two_row_types[1])
+                # look at first line to determine if it is broken
+                first_paragraph = middle_cell.paragraphs[0]
+
+                if ahb_table_row_dataframe is not None:
+                    if AhbSubTable.is_broken_line(
+                        table=ahb_table_dataframe,
+                        table_meta_data=table_meta_data,
+                        paragraph=first_paragraph,
+                    ):
+                        AhbSubTable.add_broken_line(ahb_table_dataframe, ahb_table_row_dataframe)
+                        # we have a broken line
+                        ahb_table_dataframe = pd.concat(
+                            [ahb_table_dataframe, ahb_table_row_dataframe.iloc[1:]],
+                            ignore_index=True,
+                        )
+                    else:
+                        ahb_table_dataframe = pd.concat(
+                            [ahb_table_dataframe, ahb_table_row_dataframe],
+                            ignore_index=True,
+                        )
 
             # An AhbSubTable can span over two pages.
             # But after every page break, even if we're still in the same subtable,
@@ -131,3 +165,73 @@ def iter_visible_cells(row: _Row) -> Generator[_Cell, None, None]:
         table_row = row._tr  # pylint:disable=protected-access
         for table_column in table_row.tc_lst:
             yield _Cell(table_column, row.table)
+
+    @staticmethod
+    def add_text_to_last_row(ahb_table_dataframe: pd.DataFrame, row_index: int, column_index: int, text: str) -> None:
+        """Add a text to the last row of the dataframe."""
+        starts_with_known_suffix = any(text.startswith(suffix + " ") for suffix in KNOW_SUFFIXES)
+        if len(text) > 0:
+            if len(ahb_table_dataframe.iat[row_index, column_index]) > 0 and not starts_with_known_suffix:
+                text = " " + text
+            ahb_table_dataframe.iat[row_index, column_index] += text
+
+    @staticmethod
+    def add_broken_line(ahb_table_dataframe: pd.DataFrame, broken_line: pd.DataFrame) -> None:
+        """Add a broken line to the dataframe."""
+        for col_index in range(INDEX_OF_CODES_AND_QUALIFIER_COLUMN, len(ahb_table_dataframe.columns)):
+            AhbSubTable.add_text_to_last_row(
+                ahb_table_dataframe, ahb_table_dataframe.index.max(), col_index, str(broken_line.iat[0, col_index])
+            )
+
+    @staticmethod
+    def combine_condition_text(ahb_table_dataframe: pd.DataFrame, bedingung_cell: _Cell) -> None:
+        """Add the condition text to the dataframe."""
+        conditions_text = " " + " ".join(
+            paragraph.text for paragraph in bedingung_cell.paragraphs if paragraph.text != ""
+        )
+        last_valid_row = ahb_table_dataframe["Bedingung"].last_valid_index()
+        conditions_text = ahb_table_dataframe.at[last_valid_row, "Bedingung"] + conditions_text
+        # remove existing text
+        ahb_table_dataframe.at[last_valid_row, "Bedingung"] = ""
+        # remove remaining text to avoid misplacements
+        for paragraph in bedingung_cell.paragraphs:
+            paragraph.text = ""
+        bedingung_cell.paragraphs[-1].text = conditions_text
+
+    @staticmethod
+    def is_broken_line(
+        table: pd.DataFrame,
+        table_meta_data: Seed,
+        paragraph: Paragraph,
+    ) -> bool:
+        """
+        Check for broken lines in the middle cell.
+        """
+        tabsplit_text = paragraph.text.split("\t")
+
+        loc: Union[int, slice, NDArray[np.bool_]] = table.columns.get_loc("Beschreibung")
+
+        # Ensure loc is an int
+        if isinstance(loc, int):
+            beschreibung_index: int = loc
+        else:
+            raise ValueError("The location of the column 'Beschreibung' is not an integer.")
+
+        is_empty_middle_line = all(text == "" for text in tabsplit_text)
+        is_broken_code_qualifier = (
+            paragraph.paragraph_format.left_indent is not None
+            and paragraph.paragraph_format.left_indent != table_meta_data.middle_cell_left_indent_position
+            and table.iat[-1, beschreibung_index] != ""
+            and table.iloc[-1, beschreibung_index + 1 :].ne("").any()
+        )
+        if is_broken_code_qualifier and len(tabsplit_text) == 1:
+            # only broken code / qualifier
+            assert (
+                table.iat[-1, beschreibung_index] != "" and table.iloc[-1, beschreibung_index + 1 :].ne("").any()
+            ), "no condition expected in broken line"
+        there_are_conditions = (
+            len(tabsplit_text) > 1
+            and paragraph.paragraph_format.left_indent != table_meta_data.middle_cell_left_indent_position
+        )
+
+        return is_empty_middle_line or there_are_conditions or is_broken_code_qualifier
diff --git a/src/kohlrahbi/ahbtable/ahbtablerow.py b/src/kohlrahbi/ahbtable/ahbtablerow.py
@@ -27,10 +27,7 @@ class AhbTableRow(BaseModel):
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
-    def parse(
-        self,
-        row_type: RowType,
-    ) -> Optional[pd.DataFrame]:
+    def parse(self, row_type: RowType) -> Optional[pd.DataFrame]:
         """
         Writes the current row of the current table into the DataFrame depending on the type of the row.
         If the row is a header row, it will be skipped and None will be returned.

diff --git a/src/kohlrahbi/conditions/__init__.py b/src/kohlrahbi/conditions/__init__.py
@@ -10,7 +10,7 @@
 from kohlrahbi.ahb import get_pruefi_to_file_mapping
 from kohlrahbi.ahbtable.ahbcondtions import AhbConditions
 from kohlrahbi.ahbtable.ahbpackagetable import AhbPackageTable
-from kohlrahbi.conditions.allgemeine_festlegungen import time_conditions, time_packages
+from kohlrahbi.conditions.allgemeine_festlegungen import time_conditions
 from kohlrahbi.logger import logger
 from kohlrahbi.read_functions import get_all_conditions_from_doc
 
@@ -57,6 +57,5 @@ def scrape_conditions(
                 collected_packages.include_package_dict(packages.package_dict)
             collected_conditions.include_condition_dict(cond_table.conditions_dict)
         collected_conditions.include_condition_dict({edifact_format: time_conditions})
-        collected_packages.include_package_dict({edifact_format: time_packages})
     collected_conditions.dump_as_json(output_path)
     collected_packages.dump_as_json(output_path)
diff --git a/src/kohlrahbi/conditions/allgemeine_festlegungen.py b/src/kohlrahbi/conditions/allgemeine_festlegungen.py
@@ -2,11 +2,10 @@
 """
 Contains conditions for times in allgemeine Festlegungen.
 """
-time_packages = {
-    "UB1": "([931] ∧ [932] [490]) ⊻ ([931] ∧ [933] [491])",
-    "UB2": "([931] ∧ [934] [490]) ⊻ ([931] ∧ [935] [491])",
-    "UB3": "([931] ∧ [932] [492] ∧ [490]) ⊻ ([931] ∧ [933] [492] ∧ [491]) ⊻ ([931] ∧ [934] [493] ∧ [490]) ⊻ ([931] ∧ [935] [493] ∧ [491])",
-}
+# We decided against adding the time_packages to the regular packages.
+# The time-packages are resolved by a special transformer in AHBicht:
+# https://github.com/Hochfrequenz/ahbicht/blob/c51c81d2be098dd79ff52b754979892396207fe2/src/ahbicht/expressions/expression_resolver.py#L149
+
 time_conditions = {
     "490": "wenn Wert in diesem DE, an der Stelle CCYYMMDDHHMM ein Zeitpunkt aus dem angegeben Zeitraum der Tabelle Kapitel 3.5 „Übersicht gesetzliche deutsche Sommerzeit (MESZ)“ der Spalten:\n›\t„Sommerzeit (MESZ) von“ Darstellung in UTC und\n›\t„Sommerzeit (MESZ) bis“ Darstellung in UTC ist.",
     "491": "wenn Wert in diesem DE, an der Stelle CCYYMMDDHHMM ein Zeitpunkt aus dem angegeben Zeitraum der Tabelle Kapitel 3.6 „Übersicht gesetzliche deutsche Zeit (MEZ)“ der Spalten: \n›\t„Winterzeit (MEZ) von“ Darstellung in UTC und\n›\t„Winterzeit (MEZ) bis“ Darstellung in UTC ist.",

diff --git a/src/kohlrahbi/docxfilefinder.py b/src/kohlrahbi/docxfilefinder.py
@@ -215,3 +215,16 @@ def get_all_docx_files_which_contain_change_histories(self) -> list[Path]:
         self.remove_temporary_files()
 
         return self.paths_to_docx_files
+
+    def get_docx_files_which_contain_quality_map(self) -> list[Path]:
+        """
+        This function returns a list of docx files which contain a quality map.
+        """
+
+        self.filter_for_latest_ahb_docx_files()
+        self.remove_temporary_files()
+
+        indicator_string = "UTILMDAHBStrom"
+        self.paths_to_docx_files = [path for path in self.paths_to_docx_files if indicator_string in path.name]
+
+        return self.paths_to_docx_files
diff --git a/src/kohlrahbi/docxtablecells/bedinungscell.py b/src/kohlrahbi/docxtablecells/bedinungscell.py
@@ -23,15 +23,16 @@ def parse(self, ahb_row_dataframe: pd.DataFrame) -> pd.DataFrame:
         """
         Parses a cell in the Bedingung column and puts the information into the appropriate column of the dataframe.
         """
-
-        bedingung = self.beautify_bedingungen()
+        bedingung = self.table_cell.text
+        bedingung = self.beautify_bedingungen(bedingung)
 
         row_index = ahb_row_dataframe.index.max()
         ahb_row_dataframe.at[row_index, "Bedingung"] += bedingung
         return ahb_row_dataframe
 
     # pylint: disable=line-too-long
-    def beautify_bedingungen(self) -> str:
+    @staticmethod
+    def beautify_bedingungen(bedingung: str) -> str:
         """
         Beautifies the Bedingungen by removing the given line breaks and insert the line breaks at the correct places.
 
@@ -41,11 +42,11 @@ def beautify_bedingungen(self) -> str:
         [494] Das hier genannte Datum muss der Zeitpunkt sein, zu dem das Dokument erstellt wurde, oder ein Zeitpunkt, der davor liegt
         [931] Format: ZZZ = +00
         """
-        beautified_bedingung = self.table_cell.text.replace("\n", " ")
+        beautified_bedingung = bedingung.replace("\n", " ")
 
         matches = re.findall(r"\[\d+\]", beautified_bedingung)
         for match in matches[1:]:
             index = beautified_bedingung.find(match)
-            beautified_bedingung = beautified_bedingung[:index] + "\n" + beautified_bedingung[index:]
+            beautified_bedingung = beautified_bedingung[:index].rstrip() + "\n" + beautified_bedingung[index:]
 
-        return beautified_bedingung
+        return beautified_bedingung.lstrip()