You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "d:\Develop\conda\envs\docling\Lib\runpy.py", line 198, in _run_module_as_main
return _run_code(code, main_globals, None,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\runpy.py", line 88, in run_code
exec(code, run_globals)
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy_main.py", line 71, in
cli.main()
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 501, in main
run()
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 351, in run_file
runpy.run_path(target, run_name="main")
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py", line 310, in run_path
return _run_module_code(code, init_globals, run_name, pkg_name=pkg_name, script_name=fname)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py", line 127, in _run_module_code
_run_code(code, mod_globals, init_globals, mod_name, mod_spec, pkg_name, script_name)
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py", line 118, in _run_code
exec(code, run_globals)
File "D:\Develop\CodeProjects\doc-parser\docling_parser.py", line 22, in
conv_result = doc_converter.convert(input_doc_path, max_num_pages=2)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\pydantic_internal_validate_call.py", line 38, in wrapper_function
return wrapper(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\pydantic_internal_validate_call.py", line 111, in call
res = self.pydantic_validator.validate_python(pydantic_core.ArgsKwargs(args, kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 172, in convert
return next(all_res)
^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 193, in convert_all
for conv_res in conv_res_iter:
^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 228, in _convert
for item in map(
^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 269, in _process_document
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 305, in _execute_pipeline
raise ConversionError(f"Input document {in_doc.file} is not valid.")
docling.exceptions.ConversionError: Input document doc\2410.13085v1.pdf is not valid.
...
Steps to reproduce
运行以下代码报错:
from pathlib import Path
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions, TableFormerMode
pipeline_options = PdfPipelineOptions(do_table_structure=True)
pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE # use more accurate TableFormer model
doc_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
}
)
#source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
input_doc_path = Path("./doc/2410.13085v1.pdf")
conv_result = doc_converter.convert(input_doc_path, max_num_pages=2)
## Export results
output_dir = Path("scratch")
output_dir.mkdir(parents=True, exist_ok=True)
doc_filename = conv_result.input.file.stem
# Export Markdown format:
with (output_dir / f"{doc_filename}.md").open("w", encoding="utf-8") as fp:
fp.write(conv_result.document.export_to_markdown())
...
Docling version
2.10.0
...
Python version
3.12.7
...
The text was updated successfully, but these errors were encountered:
I solved the problem, it was because you set the max_num_pages to 2. When tracing the source code, I found that when the number of page numbers in a file is bigger than max_num_pages, the valid value will be False
@CourageKeenTriumph Thanks for figuring this out. The behaviour you describe is intentional. max_num_pages is filtering out documents with a greater page count than the value provided.
Bug
Traceback (most recent call last):
File "d:\Develop\conda\envs\docling\Lib\runpy.py", line 198, in _run_module_as_main
return _run_code(code, main_globals, None,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\runpy.py", line 88, in run_code
exec(code, run_globals)
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy_main.py", line 71, in
cli.main()
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 501, in main
run()
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 351, in run_file
runpy.run_path(target, run_name="main")
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py", line 310, in run_path
return _run_module_code(code, init_globals, run_name, pkg_name=pkg_name, script_name=fname)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py", line 127, in _run_module_code
_run_code(code, mod_globals, init_globals, mod_name, mod_spec, pkg_name, script_name)
File "c:\Users\linyu.vscode\extensions\ms-python.debugpy-2024.12.0-win32-x64\bundled\libs\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py", line 118, in _run_code
exec(code, run_globals)
File "D:\Develop\CodeProjects\doc-parser\docling_parser.py", line 22, in
conv_result = doc_converter.convert(input_doc_path, max_num_pages=2)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\pydantic_internal_validate_call.py", line 38, in wrapper_function
return wrapper(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\pydantic_internal_validate_call.py", line 111, in call
res = self.pydantic_validator.validate_python(pydantic_core.ArgsKwargs(args, kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 172, in convert
return next(all_res)
^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 193, in convert_all
for conv_res in conv_res_iter:
^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 228, in _convert
for item in map(
^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 269, in _process_document
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "d:\Develop\conda\envs\docling\Lib\site-packages\docling\document_converter.py", line 305, in _execute_pipeline
raise ConversionError(f"Input document {in_doc.file} is not valid.")
docling.exceptions.ConversionError: Input document doc\2410.13085v1.pdf is not valid.
...
Steps to reproduce
运行以下代码报错:
...
Docling version
2.10.0
...
Python version
3.12.7
...
The text was updated successfully, but these errors were encountered: