Skip to content

Commit

Permalink
refactor: change the name of the USPTO input format
Browse files Browse the repository at this point in the history
Change the name of the patent USPTO input format to show the typical format (XML).

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
  • Loading branch information
ceberam committed Dec 16, 2024
1 parent c0a086c commit 264ef14
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def unload(self) -> None:
@classmethod
@override
def supported_formats(cls) -> set[InputFormat]:
return {InputFormat.PATENT_USPTO}
return {InputFormat.XML_USPTO}

@override
def convert(self) -> DoclingDocument:
Expand Down
6 changes: 3 additions & 3 deletions docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class InputFormat(str, Enum):
ASCIIDOC = "asciidoc"
MD = "md"
XLSX = "xlsx"
PATENT_USPTO = "uspto"
XML_USPTO = "uspto"


class OutputFormat(str, Enum):
Expand All @@ -56,7 +56,7 @@ class OutputFormat(str, Enum):
InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"],
InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
InputFormat.XLSX: ["xlsx"],
InputFormat.PATENT_USPTO: ["xml", "txt"],
InputFormat.XML_USPTO: ["xml", "txt"],
}

FormatToMimeType: Dict[InputFormat, List[str]] = {
Expand All @@ -83,7 +83,7 @@ class OutputFormat(str, Enum):
InputFormat.XLSX: [
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
],
InputFormat.PATENT_USPTO: ["application/xml", "text/plain"],
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
}

MimeTypeToFormat = {
Expand Down
4 changes: 2 additions & 2 deletions docling/document_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from docling.backend.msexcel_backend import MsExcelDocumentBackend
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
from docling.backend.msword_backend import MsWordDocumentBackend
from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend
from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend
from docling.datamodel.base_models import (
ConversionStatus,
DoclingComponentType,
Expand Down Expand Up @@ -118,7 +118,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
InputFormat.HTML: FormatOption(
pipeline_cls=SimplePipeline, backend=HTMLDocumentBackend
),
InputFormat.PATENT_USPTO: FormatOption(
InputFormat.XML_USPTO: FormatOption(
pipeline_cls=SimplePipeline, backend=PatentUsptoDocumentBackend
),
InputFormat.IMAGE: FormatOption(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_backend_patent_uspto.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from docling_core.types import DoclingDocument
from docling_core.types.doc import DocItemLabel, TableData, TextItem

from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend, XmlTable
from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend, XmlTable
from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import (
ConversionResult,
Expand Down Expand Up @@ -45,7 +45,7 @@ def patents() -> list[tuple[Path, DoclingDocument]]:
for in_path in patent_paths:
in_doc = InputDocument(
path_or_stream=in_path,
format=InputFormat.PATENT_USPTO,
format=InputFormat.XML_USPTO,
backend=PatentUsptoDocumentBackend,
)
backend = PatentUsptoDocumentBackend(in_doc=in_doc, path_or_stream=in_path)
Expand Down

0 comments on commit 264ef14

Please sign in to comment.