Bring Your Own Models
OCR

OCR example using RestAPI Client

Open In Colab (opens in a new tab)

This example demonstrates using our REST API client for OCR processing.

    !pip install surya-ocr
    import turboml as tb
    import pandas as pd
    import os
    from PIL import Image

Launching our FastAPI application with OCR model from jupyter-notebook

    import subprocess
    import threading
    
    
    def run_uvicorn_server(cmd, ready_event):
        process = subprocess.Popen(
            cmd,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
        )
        for line in process.stdout:
            print(line, end="")
            # Check for the message indicating the server has started
            if "Uvicorn running on" in line:
                ready_event.set()
        process.wait()
    
    
    cmd = "uvicorn ocr_server_app:app --port 5379 --host 0.0.0.0"
    
    server_ready_event = threading.Event()
    server_thread = threading.Thread(
        target=run_uvicorn_server, args=(cmd, server_ready_event)
    )
    server_thread.start()

Loading a dataset of Images

    import io
    import base64
    
    image_dir = "./data/test_images/"
    images_test = []
    labels_test = []
    widths_test = []
    heights_test = []
    
    for filename in os.listdir(image_dir):
        if filename.lower().endswith((".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif")):
            image_path = os.path.join(image_dir, filename)
    
            # Open and process the image
            with Image.open(image_path) as pil_image:
                pil_image = pil_image.convert("RGB")
    
                # Get image dimensions
                width, height = pil_image.size
    
                # Save the image to a bytes buffer
                img_byte_arr = io.BytesIO()
                pil_image.save(img_byte_arr, format="JPEG")
                binary_image = img_byte_arr.getvalue()
    
                # Encode the binary image data to base64
                base64_image = base64.b64encode(binary_image).decode("utf-8")
    
            images_test.append(base64_image)
            labels_test.append(0)  # Assigning a default label of 0
            widths_test.append(width)
            heights_test.append(height)
    
    image_dict_test = {"images": images_test, "width": widths_test, "height": heights_test}
    label_dict_test = {"labels": labels_test}
    image_df_test = pd.DataFrame(image_dict_test)
    label_df_test = pd.DataFrame(label_dict_test)
    image_df_test.reset_index(inplace=True)
    label_df_test.reset_index(inplace=True)
    
    print(f"Processed {len(images_test)} images.")
    print(f"Image DataFrame shape: {image_df_test.shape}")
    print(f"Label DataFrame shape: {label_df_test.shape}")
    image_df_test = image_df_test.reset_index(drop=True)
    label_df_test = label_df_test.reset_index(drop=True)
    images_train = tb.PandasDataset(
        dataframe=image_df_test, key_field="index", streaming=False
    )
    labels_train = tb.PandasDataset(
        dataframe=label_df_test, key_field="index", streaming=False
    )
    
    images_test = tb.PandasDataset(
        dataframe=image_df_test, key_field="index", streaming=False
    )
    labels_test = tb.PandasDataset(
        dataframe=label_df_test, key_field="index", streaming=False
    )
    imaginal_fields = ["images"]
    categorical_fields = ["width", "height"]
    features_train = images_train.get_input_fields(
        imaginal_fields=imaginal_fields, categorical_fields=categorical_fields
    )
    targets_train = labels_train.get_label_field(label_field="labels")
    
    features_test = images_test.get_input_fields(
        imaginal_fields=imaginal_fields, categorical_fields=categorical_fields
    )
    targets_test = labels_test.get_label_field(label_field="labels")

Using TurboML to make a request to OCR Server

    request_model = tb.RestAPIClient(
        server_url="http://0.0.0.0:5379/predict",
        connection_timeout=10000,
        max_request_time=10000,
        max_retries=1,
    )
    server_ready_event.wait(timeout=100)
    model_trained = request_model.learn(features_train, targets_train)
    outputs_test = model_trained.predict(features_test)
    outputs_test