Skip to content

Commit

Permalink
fixed the tests
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
  • Loading branch information
PeterStaar-IBM committed Nov 7, 2024
1 parent b154d4f commit 5c82ff9
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 4,427 deletions.
8 changes: 6 additions & 2 deletions docling/backend/html_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,19 @@ def walk(self, element, doc):
_log.info(f" -> error treating child: {exc}")
raise exc

else:
_log.debug(f"ignoring element of type {type(element)}")

"""
elif isinstance(element, Tag):
try:
self.analyse_element(element, 0, doc)
except Exception as exc:
_log.info(f" -> error treating elem: {exc}")
raise exc
else:
_log.debug(f"ignoring element of type {type(element)}")
"""


except Exception as exc:
_log.debug(f"error walking element: {type(element)}")
pass
Expand Down
9 changes: 9 additions & 0 deletions docling/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def export_documents(
export_md: bool,
export_txt: bool,
export_doctags: bool,
export_itxt: bool,
):

success_count = 0
Expand Down Expand Up @@ -115,6 +116,12 @@ def export_documents(
_log.info(f"writing Doc Tags output to {fname}")
fp.write(conv_res.document.export_to_document_tokens())

# Export Indented Text format:
if export_itxt:
fname = output_dir / f"{doc_filename}.itxt"
with fname.open("w") as fp:
_log.info(f"writing Doc Tags output to {fname}")
fp.write(conv_res.document._export_to_indented_text())
else:
_log.warning(f"Document {conv_res.input.file} failed to convert.")
failure_count += 1
Expand Down Expand Up @@ -200,6 +207,7 @@ def convert(
export_md = OutputFormat.MARKDOWN in to_formats
export_txt = OutputFormat.TEXT in to_formats
export_doctags = OutputFormat.DOCTAGS in to_formats
export_itxt = OutputFormat.INDENTED_TEXT in to_formats

match ocr_engine:
case OcrEngine.EASYOCR:
Expand Down Expand Up @@ -243,6 +251,7 @@ def convert(
export_md=export_md,
export_txt=export_txt,
export_doctags=export_doctags,
export_itxt=export_itxt,
)

end_time = time.time() - start_time
Expand Down
1 change: 1 addition & 0 deletions docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class OutputFormat(str, Enum):
JSON = "json"
TEXT = "text"
DOCTAGS = "doctags"
INDENTED_TEXT = "itxt"


FormatToExtensions: Dict[InputFormat, List[str]] = {
Expand Down
Loading

0 comments on commit 5c82ff9

Please sign in to comment.