Advanced Features

Provider Selection Helper

Use the infer_provider function to create OCR instances by provider name without importing engine classes.

from upsonic.ocr import infer_provider

# Create OCR by provider name
ocr = infer_provider('easyocr', languages=['en'], rotation_fix=True)
text = ocr.get_text('document.pdf')

# Available provider names:
# 'easyocr', 'rapidocr', 'tesseract', 'deepseek', 'deepseek_ocr'
# 'paddleocr', 'paddle', 'ppstructurev3', 'ppchatocrv4', 'paddleocrvl'

Async Processing

All OCR methods support async execution. The framework is async-first — sync methods are convenience wrappers around the async core.

import asyncio
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import EasyOCREngine

async def process_documents():
    engine = EasyOCREngine(languages=['en'], gpu=True)
    ocr = OCR(layer_1_ocr_engine=engine)

    # Async text extraction
    text = await ocr.get_text_async('document.pdf')
    print(text)

    # Async file processing with full results
    result = await ocr.process_file_async('report.pdf')
    print(f"Confidence: {result.confidence:.2%}")

asyncio.run(process_documents())

Timeout Control

Use layer_1_timeout to set a maximum processing time for the OCR engine. If the timeout is exceeded, an OCRTimeoutError is raised.

from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import EasyOCREngine
from upsonic.ocr.exceptions import OCRTimeoutError

engine = EasyOCREngine(languages=['en'])
ocr = OCR(layer_1_ocr_engine=engine, layer_1_timeout=30)

try:
    text = ocr.get_text('large_document.pdf')
except OCRTimeoutError:
    print("OCR processing timed out")

Batch Processing with DeepSeek

DeepSeek OCR provides optimized batch processing for multi-page PDFs, processing all pages in a single batch for better performance.

from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import DeepSeekOCREngine

# Create DeepSeek engine
engine = DeepSeekOCREngine(
    model_name="deepseek-ai/DeepSeek-OCR",
    temperature=0.0,
    max_tokens=8192
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Automatically uses batch processing for PDFs
result = ocr.process_file('multi_page_document.pdf')
print(f"Processed {result.page_count} pages")

Advanced PaddleOCR Features

PaddleOCR providers offer specialized features for complex document understanding.

Structure Recognition with PPStructureV3Engine

from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import PPStructureV3Engine

# Create structure-aware engine
engine = PPStructureV3Engine(
    use_table_recognition=True,
    use_formula_recognition=True,
    use_chart_recognition=True
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Extract structured content
result = ocr.provider.predict('research_paper.pdf')

# Get markdown representation
markdown_text = ocr.provider.concatenate_markdown_pages(result)
print(markdown_text)

Information Extraction with PPChatOCRv4Engine

from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import PPChatOCRv4Engine

# Create chat-based engine
engine = PPChatOCRv4Engine(
    use_table_recognition=True,
    use_seal_recognition=True
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Extract visual information
visual_result = ocr.provider.visual_predict('invoice.pdf')

# Build vector embeddings for retrieval
vector_info = ocr.provider.build_vector(
    visual_result,
    min_characters=3500,
    block_size=300
)

# Extract specific fields using chat interface
invoice_data = ocr.provider.chat(
    key_list=['invoice_number', 'date', 'total_amount', 'vendor_name'],
    visual_info=visual_result,
    use_vector_retrieval=True,
    vector_info=vector_info
)

print(f"Invoice Number: {invoice_data.get('invoice_number')}")
print(f"Date: {invoice_data.get('date')}")
print(f"Total: {invoice_data.get('total_amount')}")

Image Preprocessing

Apply preprocessing to improve OCR accuracy for low-quality images.

from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import TesseractOCREngine

# Create engine with all preprocessing enabled
engine = TesseractOCREngine(
    languages=['eng'],
    rotation_fix=True,        # Fix skewed/rotated images
    enhance_contrast=True,    # Improve text clarity
    remove_noise=True,        # Remove background noise
    pdf_dpi=300              # High quality PDF rendering
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Process low-quality image
text = ocr.get_text('skewed_noisy_image.jpg')

GET STARTED

CONCEPTS

STARTING AN AGENT PROJECT

READY TO USE SNIPPETS

DEPLOYMENT

FURTHER READINGS

Provider Selection Helper

Async Processing

Timeout Control

Batch Processing with DeepSeek

Advanced PaddleOCR Features

Structure Recognition with PPStructureV3Engine

Information Extraction with PPChatOCRv4Engine

Image Preprocessing

GET STARTED

CONCEPTS

STARTING AN AGENT PROJECT

READY TO USE SNIPPETS

DEPLOYMENT

FURTHER READINGS

​Provider Selection Helper

​Async Processing

​Timeout Control

​Batch Processing with DeepSeek

​Advanced PaddleOCR Features

​Structure Recognition with PPStructureV3Engine

​Information Extraction with PPChatOCRv4Engine

​Image Preprocessing

Provider Selection Helper

Async Processing

Timeout Control

Batch Processing with DeepSeek

Advanced PaddleOCR Features

Structure Recognition with PPStructureV3Engine

Information Extraction with PPChatOCRv4Engine

Image Preprocessing