OCR example using RestAPI Client

This example demonstrates using our REST API client for OCR processing.

import turboml as tb

!pip install surya-ocr

import os
from PIL import Image
import pandas as pd

Launching our FastAPI application with OCR model from jupyter-notebook

import subprocess
import threading
 
 
def run_uvicorn_server(cmd, ready_event):
    process = subprocess.Popen(
        cmd,
        shell=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
    )
    for line in process.stdout:
        print(line, end="")
        # Check for the message indicating the server has started
        if "Uvicorn running on" in line:
            ready_event.set()
    process.wait()
 
 
cmd = "uvicorn utils.ocr_server_app:app --port 5379 --host 0.0.0.0"
 
server_ready_event = threading.Event()
server_thread = threading.Thread(
    target=run_uvicorn_server, args=(cmd, server_ready_event)
)
server_thread.start()

Loading a dataset of Images

import io
import base64
 
image_dir = "./data/test_images/"
images_test = []
labels_test = []
widths_test = []
heights_test = []
 
for filename in os.listdir(image_dir):
    if filename.lower().endswith((".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif")):
        image_path = os.path.join(image_dir, filename)
 
        # Open and process the image
        with Image.open(image_path) as pil_image:
            pil_image = pil_image.convert("RGB")
 
            # Get image dimensions
            width, height = pil_image.size
 
            # Save the image to a bytes buffer
            img_byte_arr = io.BytesIO()
            pil_image.save(img_byte_arr, format="JPEG")
            binary_image = img_byte_arr.getvalue()
 
            # Encode the binary image data to base64
            base64_image = base64.b64encode(binary_image).decode("utf-8")
 
        images_test.append(base64_image)
        labels_test.append(0)  # Assigning a default label of 0
        widths_test.append(width)
        heights_test.append(height)
 
image_dict_test = {"images": images_test, "width": widths_test, "height": heights_test}
label_dict_test = {"labels": labels_test}
image_df_test = pd.DataFrame(image_dict_test)
label_df_test = pd.DataFrame(label_dict_test)
image_df_test.reset_index(inplace=True)
label_df_test.reset_index(inplace=True)
 
print(f"Processed {len(images_test)} images.")
print(f"Image DataFrame shape: {image_df_test.shape}")
print(f"Label DataFrame shape: {label_df_test.shape}")

image_df_test = image_df_test.reset_index(drop=True)
label_df_test = label_df_test.reset_index(drop=True)

images_train = tb.LocalDataset.from_pd(df=image_df_test, key_field="index")
labels_train = tb.LocalDataset.from_pd(df=label_df_test, key_field="index")
 
images_test = tb.LocalDataset.from_pd(df=image_df_test, key_field="index")
labels_test = tb.LocalDataset.from_pd(df=label_df_test, key_field="index")

imaginal_fields = ["images"]
categorical_fields = ["width", "height"]
features_train = images_train.get_model_inputs(
    imaginal_fields=imaginal_fields, categorical_fields=categorical_fields
)
targets_train = labels_train.get_model_labels(label_field="labels")
 
features_test = images_test.get_model_inputs(
    imaginal_fields=imaginal_fields, categorical_fields=categorical_fields
)
targets_test = labels_test.get_model_labels(label_field="labels")

Using TurboML to make a request to OCR Server

request_model = tb.RestAPIClient(
    server_url="http://0.0.0.0:5379/predict",
    connection_timeout=10000,
    max_request_time=10000,
    max_retries=1,
)

server_ready_event.wait(timeout=100)

model_trained = request_model.learn(features_train, targets_train)

outputs_test = model_trained.predict(features_test)

outputs_test

ResNet Example Native Python Model