Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/pip/pylint-3.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
hf-kklein authored Oct 7, 2024
2 parents 2828a55 + cda36d5 commit 188c569
Show file tree
Hide file tree
Showing 26 changed files with 11,135 additions and 10,131 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.12" ]
tox-env: [ "test", "lint", "formatcheck", "typecheck", "test_packaging" ]
tox-env: [ "test", "lint", "formatcheck", "typecheck", "test_packaging", "dev" ]
name: ${{ matrix.tox-env }}
runs-on: ${{ matrix.os }}
steps:
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/conventional-commit-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ on:
pull_request:
types: [opened, synchronize, reopened, edited]

permissions:
pull-requests: write

jobs:
validate-pr-title:
runs-on: ubuntu-latest
steps:
- name: PR Conventional Commit Validation
uses: ytanikin/PRConventionalCommits@1.2.0
uses: ytanikin/PRConventionalCommits@1.3.0
with:
task_types: '["feat","fix","docs","style","refactor","perf","test","build","ci","chore","revert"]'
add_label: 'false'
add_label: 'true'
custom_labels: '{"feat": "feature", "fix": "fix", "docs": "documentation", "test": "test", "ci": "CI/CD", "refactor": "refactor", "perf": "performance", "chore": "chore", "revert": "revert", "wip": "WIP"}'
10 changes: 7 additions & 3 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ jobs:
build-n-publish:
name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [ "3.12" ]
os: [ ubuntu-latest ]
# Specifying a GitHub environment, # Specifying a GitHub environment, which is strongly recommended by PyPI: https://docs.pypi.org/trusted-publishers/adding-a-publisher/
# you have to create an environment in your repository settings and add the environment name here
environment: release
Expand All @@ -47,14 +51,14 @@ jobs:
needs: test
steps:
- uses: actions/checkout@v4
- name: Set up Python
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r dev_requirements/requirements-test_packaging.txt
pip install './.[test_packaging]'
- name: Build wheel and source distributions
run: |
python -m build
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ test = [
"freezegun==1.5.1",
"pytest-datafiles==3.0.0",
"pytest==8.3.3",
"syrupy==4.7.1",
"syrupy==4.7.2",
]
typecheck = [
"mypy==1.11.2",
Expand All @@ -79,7 +79,7 @@ formatting = [
"isort==5.13.2"
]
test_packaging = [
"build==1.2.2",
"build==1.2.2.post1",
"twine==5.1.1"
]

Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,23 @@ colorama==0.4.6
# colorlog
colorlog==6.8.2
# via kohlrahbi (pyproject.toml)
efoli==1.1.0
efoli==1.2.0
# via kohlrahbi (pyproject.toml)
et-xmlfile==1.1.0
# via openpyxl
lxml==5.3.0
# via python-docx
more-itertools==10.5.0
# via kohlrahbi (pyproject.toml)
numpy==2.1.1
numpy==2.1.2
# via pandas
openpyxl==3.1.5
# via kohlrahbi (pyproject.toml)
pandas==2.2.3
# via kohlrahbi (pyproject.toml)
pydantic==2.9.2
# via kohlrahbi (pyproject.toml)
pydantic-core==2.24.0
pydantic-core==2.23.4
# via pydantic
python-dateutil==2.9.0.post0
# via pandas
Expand Down
2 changes: 2 additions & 0 deletions src/kohlrahbi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from kohlrahbi.ahb.command import ahb
from kohlrahbi.changehistory.command import changehistory
from kohlrahbi.conditions.command import conditions
from kohlrahbi.qualitymap.command import qualitymap
from kohlrahbi.version import version


Expand All @@ -20,6 +21,7 @@ def cli() -> None:
cli.add_command(ahb)
cli.add_command(changehistory)
cli.add_command(conditions)
cli.add_command(qualitymap)

if __name__ == "__main__":
# the parameter arguments gets provided over the CLI
Expand Down
110 changes: 107 additions & 3 deletions src/kohlrahbi/ahbtable/ahbsubtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
This module contains the AhbSubTable class.
"""

from typing import Generator
from typing import Generator, Union

import numpy as np
import pandas as pd
from docx.table import Table as DocxTable
from docx.table import _Cell, _Row
from docx.text.paragraph import Paragraph
from numpy.typing import NDArray
from pydantic import BaseModel, ConfigDict

from kohlrahbi.ahbtable.ahbtablerow import AhbTableRow
from kohlrahbi.docxtablecells.bodycell import INDEX_OF_CODES_AND_QUALIFIER_COLUMN, KNOW_SUFFIXES
from kohlrahbi.enums import RowType
from kohlrahbi.row_type_checker import get_row_type
from kohlrahbi.seed import Seed
Expand All @@ -30,6 +34,7 @@ class AhbSubTable(BaseModel):
def _parse_docx_table(
table_meta_data: Seed, ahb_table_dataframe: pd.DataFrame, docx_table: DocxTable
) -> pd.DataFrame:
"""Parse the docx table and add the information to the dataframe."""
for row in docx_table.rows:
sanitized_cells = list(AhbSubTable.iter_visible_cells(row=row))

Expand Down Expand Up @@ -58,16 +63,45 @@ def _parse_docx_table(

if ahb_table_row_dataframe is not None:
ahb_table_dataframe = pd.concat([ahb_table_dataframe, ahb_table_row_dataframe], ignore_index=True)
# this case covers the page break situation
else:
# this case covers the page break situation

# check for conditions_text
contains_condition_texts = any(paragraph.text != "" for paragraph in bedingung_cell.paragraphs)
# conditions are always at the top of a dataelement
# add condition texts
if contains_condition_texts:
AhbSubTable.combine_condition_text(ahb_table_dataframe, bedingung_cell)

# add new row regularly
ahb_table_row = AhbTableRow(
seed=table_meta_data,
edifact_struktur_cell=edifact_struktur_cell,
middle_cell=middle_cell,
bedingung_cell=bedingung_cell,
)
ahb_table_row_dataframe = ahb_table_row.parse(row_type=current_row_type)

ahb_table_row.parse(row_type=table_meta_data.last_two_row_types[1])
# look at first line to determine if it is broken
first_paragraph = middle_cell.paragraphs[0]

if ahb_table_row_dataframe is not None:
if AhbSubTable.is_broken_line(
table=ahb_table_dataframe,
table_meta_data=table_meta_data,
paragraph=first_paragraph,
):
AhbSubTable.add_broken_line(ahb_table_dataframe, ahb_table_row_dataframe)
# we have a broken line
ahb_table_dataframe = pd.concat(
[ahb_table_dataframe, ahb_table_row_dataframe.iloc[1:]],
ignore_index=True,
)
else:
ahb_table_dataframe = pd.concat(
[ahb_table_dataframe, ahb_table_row_dataframe],
ignore_index=True,
)

# An AhbSubTable can span over two pages.
# But after every page break, even if we're still in the same subtable,
Expand Down Expand Up @@ -131,3 +165,73 @@ def iter_visible_cells(row: _Row) -> Generator[_Cell, None, None]:
table_row = row._tr # pylint:disable=protected-access
for table_column in table_row.tc_lst:
yield _Cell(table_column, row.table)

@staticmethod
def add_text_to_last_row(ahb_table_dataframe: pd.DataFrame, row_index: int, column_index: int, text: str) -> None:
"""Add a text to the last row of the dataframe."""
starts_with_known_suffix = any(text.startswith(suffix + " ") for suffix in KNOW_SUFFIXES)
if len(text) > 0:
if len(ahb_table_dataframe.iat[row_index, column_index]) > 0 and not starts_with_known_suffix:
text = " " + text
ahb_table_dataframe.iat[row_index, column_index] += text

@staticmethod
def add_broken_line(ahb_table_dataframe: pd.DataFrame, broken_line: pd.DataFrame) -> None:
"""Add a broken line to the dataframe."""
for col_index in range(INDEX_OF_CODES_AND_QUALIFIER_COLUMN, len(ahb_table_dataframe.columns)):
AhbSubTable.add_text_to_last_row(
ahb_table_dataframe, ahb_table_dataframe.index.max(), col_index, str(broken_line.iat[0, col_index])
)

@staticmethod
def combine_condition_text(ahb_table_dataframe: pd.DataFrame, bedingung_cell: _Cell) -> None:
"""Add the condition text to the dataframe."""
conditions_text = " " + " ".join(
paragraph.text for paragraph in bedingung_cell.paragraphs if paragraph.text != ""
)
last_valid_row = ahb_table_dataframe["Bedingung"].last_valid_index()
conditions_text = ahb_table_dataframe.at[last_valid_row, "Bedingung"] + conditions_text
# remove existing text
ahb_table_dataframe.at[last_valid_row, "Bedingung"] = ""
# remove remaining text to avoid misplacements
for paragraph in bedingung_cell.paragraphs:
paragraph.text = ""
bedingung_cell.paragraphs[-1].text = conditions_text

@staticmethod
def is_broken_line(
table: pd.DataFrame,
table_meta_data: Seed,
paragraph: Paragraph,
) -> bool:
"""
Check for broken lines in the middle cell.
"""
tabsplit_text = paragraph.text.split("\t")

loc: Union[int, slice, NDArray[np.bool_]] = table.columns.get_loc("Beschreibung")

# Ensure loc is an int
if isinstance(loc, int):
beschreibung_index: int = loc
else:
raise ValueError("The location of the column 'Beschreibung' is not an integer.")

is_empty_middle_line = all(text == "" for text in tabsplit_text)
is_broken_code_qualifier = (
paragraph.paragraph_format.left_indent is not None
and paragraph.paragraph_format.left_indent != table_meta_data.middle_cell_left_indent_position
and table.iat[-1, beschreibung_index] != ""
and table.iloc[-1, beschreibung_index + 1 :].ne("").any()
)
if is_broken_code_qualifier and len(tabsplit_text) == 1:
# only broken code / qualifier
assert (
table.iat[-1, beschreibung_index] != "" and table.iloc[-1, beschreibung_index + 1 :].ne("").any()
), "no condition expected in broken line"
there_are_conditions = (
len(tabsplit_text) > 1
and paragraph.paragraph_format.left_indent != table_meta_data.middle_cell_left_indent_position
)

return is_empty_middle_line or there_are_conditions or is_broken_code_qualifier
5 changes: 1 addition & 4 deletions src/kohlrahbi/ahbtable/ahbtablerow.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,7 @@ class AhbTableRow(BaseModel):

model_config = ConfigDict(arbitrary_types_allowed=True)

def parse(
self,
row_type: RowType,
) -> Optional[pd.DataFrame]:
def parse(self, row_type: RowType) -> Optional[pd.DataFrame]:
"""
Writes the current row of the current table into the DataFrame depending on the type of the row.
If the row is a header row, it will be skipped and None will be returned.
Expand Down
3 changes: 1 addition & 2 deletions src/kohlrahbi/conditions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from kohlrahbi.ahb import get_pruefi_to_file_mapping
from kohlrahbi.ahbtable.ahbcondtions import AhbConditions
from kohlrahbi.ahbtable.ahbpackagetable import AhbPackageTable
from kohlrahbi.conditions.allgemeine_festlegungen import time_conditions, time_packages
from kohlrahbi.conditions.allgemeine_festlegungen import time_conditions
from kohlrahbi.logger import logger
from kohlrahbi.read_functions import get_all_conditions_from_doc

Expand Down Expand Up @@ -57,6 +57,5 @@ def scrape_conditions(
collected_packages.include_package_dict(packages.package_dict)
collected_conditions.include_condition_dict(cond_table.conditions_dict)
collected_conditions.include_condition_dict({edifact_format: time_conditions})
collected_packages.include_package_dict({edifact_format: time_packages})
collected_conditions.dump_as_json(output_path)
collected_packages.dump_as_json(output_path)
9 changes: 4 additions & 5 deletions src/kohlrahbi/conditions/allgemeine_festlegungen.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
"""
Contains conditions for times in allgemeine Festlegungen.
"""
time_packages = {
"UB1": "([931] ∧ [932] [490]) ⊻ ([931] ∧ [933] [491])",
"UB2": "([931] ∧ [934] [490]) ⊻ ([931] ∧ [935] [491])",
"UB3": "([931] ∧ [932] [492] ∧ [490]) ⊻ ([931] ∧ [933] [492] ∧ [491]) ⊻ ([931] ∧ [934] [493] ∧ [490]) ⊻ ([931] ∧ [935] [493] ∧ [491])",
}
# We decided against adding the time_packages to the regular packages.
# The time-packages are resolved by a special transformer in AHBicht:
# https://github.com/Hochfrequenz/ahbicht/blob/c51c81d2be098dd79ff52b754979892396207fe2/src/ahbicht/expressions/expression_resolver.py#L149

time_conditions = {
"490": "wenn Wert in diesem DE, an der Stelle CCYYMMDDHHMM ein Zeitpunkt aus dem angegeben Zeitraum der Tabelle Kapitel 3.5 „Übersicht gesetzliche deutsche Sommerzeit (MESZ)“ der Spalten:\n\t„Sommerzeit (MESZ) von“ Darstellung in UTC und\n\t„Sommerzeit (MESZ) bis“ Darstellung in UTC ist.",
"491": "wenn Wert in diesem DE, an der Stelle CCYYMMDDHHMM ein Zeitpunkt aus dem angegeben Zeitraum der Tabelle Kapitel 3.6 „Übersicht gesetzliche deutsche Zeit (MEZ)“ der Spalten: \n\t„Winterzeit (MEZ) von“ Darstellung in UTC und\n\t„Winterzeit (MEZ) bis“ Darstellung in UTC ist.",
Expand Down
13 changes: 13 additions & 0 deletions src/kohlrahbi/docxfilefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,16 @@ def get_all_docx_files_which_contain_change_histories(self) -> list[Path]:
self.remove_temporary_files()

return self.paths_to_docx_files

def get_docx_files_which_contain_quality_map(self) -> list[Path]:
"""
This function returns a list of docx files which contain a quality map.
"""

self.filter_for_latest_ahb_docx_files()
self.remove_temporary_files()

indicator_string = "UTILMDAHBStrom"
self.paths_to_docx_files = [path for path in self.paths_to_docx_files if indicator_string in path.name]

return self.paths_to_docx_files
13 changes: 7 additions & 6 deletions src/kohlrahbi/docxtablecells/bedinungscell.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ def parse(self, ahb_row_dataframe: pd.DataFrame) -> pd.DataFrame:
"""
Parses a cell in the Bedingung column and puts the information into the appropriate column of the dataframe.
"""

bedingung = self.beautify_bedingungen()
bedingung = self.table_cell.text
bedingung = self.beautify_bedingungen(bedingung)

row_index = ahb_row_dataframe.index.max()
ahb_row_dataframe.at[row_index, "Bedingung"] += bedingung
return ahb_row_dataframe

# pylint: disable=line-too-long
def beautify_bedingungen(self) -> str:
@staticmethod
def beautify_bedingungen(bedingung: str) -> str:
"""
Beautifies the Bedingungen by removing the given line breaks and insert the line breaks at the correct places.
Expand All @@ -41,11 +42,11 @@ def beautify_bedingungen(self) -> str:
[494] Das hier genannte Datum muss der Zeitpunkt sein, zu dem das Dokument erstellt wurde, oder ein Zeitpunkt, der davor liegt
[931] Format: ZZZ = +00
"""
beautified_bedingung = self.table_cell.text.replace("\n", " ")
beautified_bedingung = bedingung.replace("\n", " ")

matches = re.findall(r"\[\d+\]", beautified_bedingung)
for match in matches[1:]:
index = beautified_bedingung.find(match)
beautified_bedingung = beautified_bedingung[:index] + "\n" + beautified_bedingung[index:]
beautified_bedingung = beautified_bedingung[:index].rstrip() + "\n" + beautified_bedingung[index:]

return beautified_bedingung
return beautified_bedingung.lstrip()
Loading

0 comments on commit 188c569

Please sign in to comment.