Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 54 additions & 4 deletions docling_jobkit/convert/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@
VlmPipelineOptions,
)
from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions, InlineVlmOptions
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
from docling.document_converter import (
DocumentConverter,
FormatOption,
ImageFormatOption,
PdfFormatOption,
)
from docling.models.factories import get_ocr_factory
from docling.pipeline.vlm_pipeline import VlmPipeline
from docling_core.types.doc import ImageRefMode
Expand Down Expand Up @@ -68,12 +73,26 @@ def _hash_pdf_format_option(pdf_format_option: PdfFormatOption) -> bytes:
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump(
serialize_as_any=True, mode="json"
)
data["pipeline_options_type"] = (
f"{pdf_format_option.pipeline_options.__class__.__module__}."
f"{pdf_format_option.pipeline_options.__class__.__qualname__}"
)
else:
data["pipeline_options_type"] = None

# Replace `pipeline_cls` with a string representation
data["pipeline_cls"] = repr(data["pipeline_cls"])
pipeline_cls = pdf_format_option.pipeline_cls
data["pipeline_cls"] = (
f"{pipeline_cls.__module__}.{pipeline_cls.__qualname__}"
if pipeline_cls is not None
else "None"
)

# Replace `backend` with a string representation
data["backend"] = repr(data["backend"])
backend = pdf_format_option.backend
data["backend"] = (
f"{backend.__module__}.{backend.__qualname__}" if backend is not None else "None"
)

# Serialize the dictionary to JSON with sorted keys to have consistent hashes
serialized_data = json.dumps(data, sort_keys=True)
Expand Down Expand Up @@ -121,9 +140,19 @@ def _create_converter_cache_from_hash(
@lru_cache(maxsize=cache_size)
def _get_converter_from_hash(options_hash: bytes) -> DocumentConverter:
pdf_format_option = self._options_map[options_hash]
image_format_option: FormatOption = pdf_format_option
if isinstance(pdf_format_option.pipeline_cls, type) and issubclass(
pdf_format_option.pipeline_cls, VlmPipeline
):
image_format_option = ImageFormatOption(
pipeline_cls=pdf_format_option.pipeline_cls,
pipeline_options=pdf_format_option.pipeline_options,
backend_options=pdf_format_option.backend_options,
)

format_options: dict[InputFormat, FormatOption] = {
InputFormat.PDF: pdf_format_option,
InputFormat.IMAGE: pdf_format_option,
InputFormat.IMAGE: image_format_option,
}

return DocumentConverter(format_options=format_options)
Expand Down Expand Up @@ -282,6 +311,27 @@ def _parse_vlm_pdf_opts(
request.vlm_pipeline_model_api.model_dump()
)

pipeline_options.do_picture_classification = request.do_picture_classification
pipeline_options.do_picture_description = request.do_picture_description

if request.picture_description_local is not None:
pipeline_options.picture_description_options = (
PictureDescriptionVlmOptions.model_validate(
request.picture_description_local.model_dump()
)
)

if request.picture_description_api is not None:
pipeline_options.picture_description_options = (
PictureDescriptionApiOptions.model_validate(
request.picture_description_api.model_dump()
)
)

pipeline_options.picture_description_options.picture_area_threshold = (
request.picture_description_area_threshold
)

return pipeline_options

# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
Expand Down
17 changes: 17 additions & 0 deletions tests/test_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel import vlm_model_specs
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import (
PdfBackend,
PdfPipelineOptions,
Expand Down Expand Up @@ -112,6 +113,12 @@ def test_options_cache_key():
opts.do_picture_description = True
pipeline_opts = m.get_pdf_pipeline_opts(opts)
hash = _hash_pdf_format_option(pipeline_opts)
assert hash not in hashes
hashes.add(hash)

opts = ConvertDocumentsOptions(pipeline=ProcessingPipeline.VLM)
pipeline_opts = m.get_pdf_pipeline_opts(opts)
hash = _hash_pdf_format_option(pipeline_opts)
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
assert hash not in hashes
hashes.add(hash)
Expand Down Expand Up @@ -144,3 +151,13 @@ def test_options_cache_key():
# pprint(pipeline_opts.pipeline_options.model_dump(serialize_as_any=True))
assert hash not in hashes
hashes.add(hash)


def test_image_pipeline_uses_vlm_pipeline_when_requested():
m = DoclingConverterManager(config=DoclingConverterManagerConfig())
opts = ConvertDocumentsOptions(pipeline=ProcessingPipeline.VLM)
pipeline_opts = m.get_pdf_pipeline_opts(opts)
converter = m.get_converter(pipeline_opts)
img_opt = converter.format_to_options[InputFormat.IMAGE]
assert img_opt.pipeline_cls == VlmPipeline
assert isinstance(img_opt.pipeline_options, VlmPipelineOptions)