diff --git a/docling/backend/patent_uspto_backend.py b/docling/backend/xml_uspto_backend.py similarity index 99% rename from docling/backend/patent_uspto_backend.py rename to docling/backend/xml_uspto_backend.py index d5614728..bcbe25be 100644 --- a/docling/backend/patent_uspto_backend.py +++ b/docling/backend/xml_uspto_backend.py @@ -110,7 +110,7 @@ def unload(self) -> None: @classmethod @override def supported_formats(cls) -> set[InputFormat]: - return {InputFormat.PATENT_USPTO} + return {InputFormat.XML_USPTO} @override def convert(self) -> DoclingDocument: diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 6a95fdcd..4f73286f 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -36,7 +36,7 @@ class InputFormat(str, Enum): ASCIIDOC = "asciidoc" MD = "md" XLSX = "xlsx" - PATENT_USPTO = "uspto" + XML_USPTO = "uspto" class OutputFormat(str, Enum): @@ -56,7 +56,7 @@ class OutputFormat(str, Enum): InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"], InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"], InputFormat.XLSX: ["xlsx"], - InputFormat.PATENT_USPTO: ["xml", "txt"], + InputFormat.XML_USPTO: ["xml", "txt"], } FormatToMimeType: Dict[InputFormat, List[str]] = { @@ -83,7 +83,7 @@ class OutputFormat(str, Enum): InputFormat.XLSX: [ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ], - InputFormat.PATENT_USPTO: ["application/xml", "text/plain"], + InputFormat.XML_USPTO: ["application/xml", "text/plain"], } MimeTypeToFormat = { diff --git a/docling/document_converter.py b/docling/document_converter.py index 123051c8..48e41d6c 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -15,7 +15,7 @@ from docling.backend.msexcel_backend import MsExcelDocumentBackend from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend from docling.backend.msword_backend import MsWordDocumentBackend -from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend +from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend from docling.datamodel.base_models import ( ConversionStatus, DoclingComponentType, @@ -118,7 +118,7 @@ def _get_default_option(format: InputFormat) -> FormatOption: InputFormat.HTML: FormatOption( pipeline_cls=SimplePipeline, backend=HTMLDocumentBackend ), - InputFormat.PATENT_USPTO: FormatOption( + InputFormat.XML_USPTO: FormatOption( pipeline_cls=SimplePipeline, backend=PatentUsptoDocumentBackend ), InputFormat.IMAGE: FormatOption( diff --git a/tests/test_backend_patent_uspto.py b/tests/test_backend_patent_uspto.py index 5c22f7ad..8e34d2b4 100644 --- a/tests/test_backend_patent_uspto.py +++ b/tests/test_backend_patent_uspto.py @@ -12,7 +12,7 @@ from docling_core.types import DoclingDocument from docling_core.types.doc import DocItemLabel, TableData, TextItem -from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend, XmlTable +from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend, XmlTable from docling.datamodel.base_models import InputFormat from docling.datamodel.document import ( ConversionResult, @@ -45,7 +45,7 @@ def patents() -> list[tuple[Path, DoclingDocument]]: for in_path in patent_paths: in_doc = InputDocument( path_or_stream=in_path, - format=InputFormat.PATENT_USPTO, + format=InputFormat.XML_USPTO, backend=PatentUsptoDocumentBackend, ) backend = PatentUsptoDocumentBackend(in_doc=in_doc, path_or_stream=in_path)