Model family engines example
What this example does
- Demonstrates runtime abstraction for object detection engines
- Demonstrates runtime abstraction for image classification engines
- Runs the same layout detection task with both ONNX Runtime and Transformers engines
- Runs picture classification with the corresponding ONNX Runtime or Transformers engine
- Shows how to easily switch between inference engines while using the same model
- Detects document structure elements (text blocks, tables, figures, etc.)
Requirements
- Python 3.10+
- Install Docling:
pip install docling[onnxruntime]
How to run (from repo root)
python docs/examples/model_family_engines_example.py
Example code¶
In [ ]:
Copied!
import logging
import sys
from docling_core.types.doc.base import ImageRefMode
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.base_models import InputFormat
from docling.datamodel.image_classification_engine_options import (
OnnxRuntimeImageClassificationEngineOptions,
TransformersImageClassificationEngineOptions,
)
from docling.datamodel.object_detection_engine_options import (
OnnxRuntimeObjectDetectionEngineOptions,
TransformersObjectDetectionEngineOptions,
)
from docling.datamodel.picture_classification_options import (
DocumentPictureClassifierOptions,
)
from docling.datamodel.pipeline_options import (
LayoutObjectDetectionOptions,
PdfPipelineOptions,
)
from docling.document_converter import (
DocumentConverter,
ImageFormatOption,
PdfFormatOption,
)
_log = logging.getLogger(__name__)
def is_onnxruntime_available() -> bool:
"""Return True when onnxruntime can be imported in this environment."""
try:
import onnxruntime
except ImportError:
return False
return True
def _build_engine_options(engine_kind: str):
"""Build paired layout/classification runtime options from a single selector."""
if engine_kind == "onnxruntime":
return (
"ONNX Runtime",
OnnxRuntimeObjectDetectionEngineOptions(),
OnnxRuntimeImageClassificationEngineOptions(),
)
if engine_kind == "transformers":
return (
"Transformers",
TransformersObjectDetectionEngineOptions(compile_model=False),
TransformersImageClassificationEngineOptions(compile_model=False),
)
raise ValueError(f"Unsupported engine kind: {engine_kind}")
def run_with_engine(engine_kind: str, input_doc_path: str):
"""Run layout detection and picture classification with paired engines."""
(
engine_name,
layout_engine_options,
picture_engine_options,
) = _build_engine_options(engine_kind)
_log.info(f"{'=' * 80}")
_log.info("Running conversion with %s runtime", engine_name)
_log.info(f"{'=' * 80}\n")
# Configure pipeline options
pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = False
pipeline_options.do_table_structure = True
pipeline_options.do_picture_classification = True
pipeline_options.generate_page_images = True
pipeline_options.generate_picture_images = True
pipeline_options.images_scale = 2.0
pipeline_options.accelerator_options = AcceleratorOptions(
device=AcceleratorDevice.CPU
)
# Create layout options with the specified engine
layout_options = LayoutObjectDetectionOptions.from_preset("layout_heron_default")
layout_options.engine_options = layout_engine_options
pipeline_options.layout_options = layout_options
# Create picture-classifier options with the same engine family as layout
picture_classifier_options = DocumentPictureClassifierOptions.from_preset(
"document_figure_classifier_v2"
)
picture_classifier_options.engine_options = picture_engine_options
pipeline_options.picture_classification_options = picture_classifier_options
# Create converter with the configured pipeline
converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options),
}
)
# Convert the document
result = converter.convert(input_doc_path)
# Save output with engine-specific filename
output_filename = f"model_family_engines_{engine_kind}.html"
result.document.save_as_html(output_filename, image_mode=ImageRefMode.EMBEDDED)
_log.info(
"Completed %s run: output=%s, pictures=%d",
engine_name,
output_filename,
len(result.document.pictures),
)
return result
def main():
# Configure logging to display info messages
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logging.getLogger("docling").setLevel(logging.INFO)
# Use a sample PDF from the test data (path relative to repo root)
input_doc_path = "tests/data/pdf/2206.01062.pdf"
# Run 1: ONNX Runtime Engine (if available in the current environment)
if is_onnxruntime_available():
# Uses automatic device selection via pipeline accelerator options
run_with_engine("onnxruntime", input_doc_path)
else:
_log.warning(
"Skipping ONNX engine run: onnxruntime is not available for Python %d.%d. "
"Use Python < 3.14 and install `docling[onnxruntime]`.",
sys.version_info.major,
sys.version_info.minor,
)
# Run 2: Transformers Engine
# Uses PyTorch with HuggingFace Transformers and automatic device selection
run_with_engine("transformers", input_doc_path)
if __name__ == "__main__":
main()
import logging
import sys
from docling_core.types.doc.base import ImageRefMode
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
from docling.datamodel.base_models import InputFormat
from docling.datamodel.image_classification_engine_options import (
OnnxRuntimeImageClassificationEngineOptions,
TransformersImageClassificationEngineOptions,
)
from docling.datamodel.object_detection_engine_options import (
OnnxRuntimeObjectDetectionEngineOptions,
TransformersObjectDetectionEngineOptions,
)
from docling.datamodel.picture_classification_options import (
DocumentPictureClassifierOptions,
)
from docling.datamodel.pipeline_options import (
LayoutObjectDetectionOptions,
PdfPipelineOptions,
)
from docling.document_converter import (
DocumentConverter,
ImageFormatOption,
PdfFormatOption,
)
_log = logging.getLogger(__name__)
def is_onnxruntime_available() -> bool:
"""Return True when onnxruntime can be imported in this environment."""
try:
import onnxruntime
except ImportError:
return False
return True
def _build_engine_options(engine_kind: str):
"""Build paired layout/classification runtime options from a single selector."""
if engine_kind == "onnxruntime":
return (
"ONNX Runtime",
OnnxRuntimeObjectDetectionEngineOptions(),
OnnxRuntimeImageClassificationEngineOptions(),
)
if engine_kind == "transformers":
return (
"Transformers",
TransformersObjectDetectionEngineOptions(compile_model=False),
TransformersImageClassificationEngineOptions(compile_model=False),
)
raise ValueError(f"Unsupported engine kind: {engine_kind}")
def run_with_engine(engine_kind: str, input_doc_path: str):
"""Run layout detection and picture classification with paired engines."""
(
engine_name,
layout_engine_options,
picture_engine_options,
) = _build_engine_options(engine_kind)
_log.info(f"{'=' * 80}")
_log.info("Running conversion with %s runtime", engine_name)
_log.info(f"{'=' * 80}\n")
# Configure pipeline options
pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = False
pipeline_options.do_table_structure = True
pipeline_options.do_picture_classification = True
pipeline_options.generate_page_images = True
pipeline_options.generate_picture_images = True
pipeline_options.images_scale = 2.0
pipeline_options.accelerator_options = AcceleratorOptions(
device=AcceleratorDevice.CPU
)
# Create layout options with the specified engine
layout_options = LayoutObjectDetectionOptions.from_preset("layout_heron_default")
layout_options.engine_options = layout_engine_options
pipeline_options.layout_options = layout_options
# Create picture-classifier options with the same engine family as layout
picture_classifier_options = DocumentPictureClassifierOptions.from_preset(
"document_figure_classifier_v2"
)
picture_classifier_options.engine_options = picture_engine_options
pipeline_options.picture_classification_options = picture_classifier_options
# Create converter with the configured pipeline
converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options),
}
)
# Convert the document
result = converter.convert(input_doc_path)
# Save output with engine-specific filename
output_filename = f"model_family_engines_{engine_kind}.html"
result.document.save_as_html(output_filename, image_mode=ImageRefMode.EMBEDDED)
_log.info(
"Completed %s run: output=%s, pictures=%d",
engine_name,
output_filename,
len(result.document.pictures),
)
return result
def main():
# Configure logging to display info messages
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logging.getLogger("docling").setLevel(logging.INFO)
# Use a sample PDF from the test data (path relative to repo root)
input_doc_path = "tests/data/pdf/2206.01062.pdf"
# Run 1: ONNX Runtime Engine (if available in the current environment)
if is_onnxruntime_available():
# Uses automatic device selection via pipeline accelerator options
run_with_engine("onnxruntime", input_doc_path)
else:
_log.warning(
"Skipping ONNX engine run: onnxruntime is not available for Python %d.%d. "
"Use Python < 3.14 and install `docling[onnxruntime]`.",
sys.version_info.major,
sys.version_info.minor,
)
# Run 2: Transformers Engine
# Uses PyTorch with HuggingFace Transformers and automatic device selection
run_with_engine("transformers", input_doc_path)
if __name__ == "__main__":
main()