Skip to main content

Provider Selection Helper

Use the infer_provider function to create OCR instances by provider name without importing engine classes.
from upsonic.ocr import infer_provider

# Create OCR by provider name
ocr = infer_provider('easyocr', languages=['en'], rotation_fix=True)
text = ocr.get_text('document.pdf')

# Available provider names:
# 'easyocr', 'rapidocr', 'tesseract', 'deepseek', 'deepseek_ocr'
# 'paddleocr', 'paddle', 'ppstructurev3', 'ppchatocrv4', 'paddleocrvl'

Async Processing

All OCR methods support async execution. The framework is async-first — sync methods are convenience wrappers around the async core.
import asyncio
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import EasyOCREngine

async def process_documents():
    engine = EasyOCREngine(languages=['en'], gpu=True)
    ocr = OCR(layer_1_ocr_engine=engine)

    # Async text extraction
    text = await ocr.get_text_async('document.pdf')
    print(text)

    # Async file processing with full results
    result = await ocr.process_file_async('report.pdf')
    print(f"Confidence: {result.confidence:.2%}")

asyncio.run(process_documents())

Timeout Control

Use layer_1_timeout to set a maximum processing time for the OCR engine. If the timeout is exceeded, an OCRTimeoutError is raised.
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import EasyOCREngine
from upsonic.ocr.exceptions import OCRTimeoutError

engine = EasyOCREngine(languages=['en'])
ocr = OCR(layer_1_ocr_engine=engine, layer_1_timeout=30)

try:
    text = ocr.get_text('large_document.pdf')
except OCRTimeoutError:
    print("OCR processing timed out")

Batch Processing with DeepSeek

DeepSeek OCR provides optimized batch processing for multi-page PDFs, processing all pages in a single batch for better performance.
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import DeepSeekOCREngine

# Create DeepSeek engine
engine = DeepSeekOCREngine(
    model_name="deepseek-ai/DeepSeek-OCR",
    temperature=0.0,
    max_tokens=8192
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Automatically uses batch processing for PDFs
result = ocr.process_file('multi_page_document.pdf')
print(f"Processed {result.page_count} pages")

Advanced PaddleOCR Features

PaddleOCR providers offer specialized features for complex document understanding.

Structure Recognition with PPStructureV3Engine

from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import PPStructureV3Engine

# Create structure-aware engine
engine = PPStructureV3Engine(
    use_table_recognition=True,
    use_formula_recognition=True,
    use_chart_recognition=True
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Extract structured content
result = ocr.provider.predict('research_paper.pdf')

# Get markdown representation
markdown_text = ocr.provider.concatenate_markdown_pages(result)
print(markdown_text)

Information Extraction with PPChatOCRv4Engine

from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import PPChatOCRv4Engine

# Create chat-based engine
engine = PPChatOCRv4Engine(
    use_table_recognition=True,
    use_seal_recognition=True
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Extract visual information
visual_result = ocr.provider.visual_predict('invoice.pdf')

# Build vector embeddings for retrieval
vector_info = ocr.provider.build_vector(
    visual_result,
    min_characters=3500,
    block_size=300
)

# Extract specific fields using chat interface
invoice_data = ocr.provider.chat(
    key_list=['invoice_number', 'date', 'total_amount', 'vendor_name'],
    visual_info=visual_result,
    use_vector_retrieval=True,
    vector_info=vector_info
)

print(f"Invoice Number: {invoice_data.get('invoice_number')}")
print(f"Date: {invoice_data.get('date')}")
print(f"Total: {invoice_data.get('total_amount')}")

Image Preprocessing

Apply preprocessing to improve OCR accuracy for low-quality images.
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import TesseractOCREngine

# Create engine with all preprocessing enabled
engine = TesseractOCREngine(
    languages=['eng'],
    rotation_fix=True,        # Fix skewed/rotated images
    enhance_contrast=True,    # Improve text clarity
    remove_noise=True,        # Remove background noise
    pdf_dpi=300              # High quality PDF rendering
)

# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)

# Process low-quality image
text = ocr.get_text('skewed_noisy_image.jpg')