OCR example using RestAPI Client
This example demonstrates using our REST API client for OCR processing.
!pip install surya-ocr
import turboml as tb
import pandas as pd
import os
from PIL import Image
Launching our FastAPI application with OCR model from jupyter-notebook
import subprocess
import threading
def run_uvicorn_server(cmd, ready_event):
process = subprocess.Popen(
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
)
for line in process.stdout:
print(line, end="")
# Check for the message indicating the server has started
if "Uvicorn running on" in line:
ready_event.set()
process.wait()
cmd = "uvicorn ocr_server_app:app --port 5379 --host 0.0.0.0"
server_ready_event = threading.Event()
server_thread = threading.Thread(
target=run_uvicorn_server, args=(cmd, server_ready_event)
)
server_thread.start()
Loading a dataset of Images
import io
import base64
image_dir = "./data/test_images/"
images_test = []
labels_test = []
widths_test = []
heights_test = []
for filename in os.listdir(image_dir):
if filename.lower().endswith((".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif")):
image_path = os.path.join(image_dir, filename)
# Open and process the image
with Image.open(image_path) as pil_image:
pil_image = pil_image.convert("RGB")
# Get image dimensions
width, height = pil_image.size
# Save the image to a bytes buffer
img_byte_arr = io.BytesIO()
pil_image.save(img_byte_arr, format="JPEG")
binary_image = img_byte_arr.getvalue()
# Encode the binary image data to base64
base64_image = base64.b64encode(binary_image).decode("utf-8")
images_test.append(base64_image)
labels_test.append(0) # Assigning a default label of 0
widths_test.append(width)
heights_test.append(height)
image_dict_test = {"images": images_test, "width": widths_test, "height": heights_test}
label_dict_test = {"labels": labels_test}
image_df_test = pd.DataFrame(image_dict_test)
label_df_test = pd.DataFrame(label_dict_test)
image_df_test.reset_index(inplace=True)
label_df_test.reset_index(inplace=True)
print(f"Processed {len(images_test)} images.")
print(f"Image DataFrame shape: {image_df_test.shape}")
print(f"Label DataFrame shape: {label_df_test.shape}")
image_df_test = image_df_test.reset_index(drop=True)
label_df_test = label_df_test.reset_index(drop=True)
images_train = tb.PandasDataset(
dataframe=image_df_test, key_field="index", streaming=False
)
labels_train = tb.PandasDataset(
dataframe=label_df_test, key_field="index", streaming=False
)
images_test = tb.PandasDataset(
dataframe=image_df_test, key_field="index", streaming=False
)
labels_test = tb.PandasDataset(
dataframe=label_df_test, key_field="index", streaming=False
)
imaginal_fields = ["images"]
categorical_fields = ["width", "height"]
features_train = images_train.get_input_fields(
imaginal_fields=imaginal_fields, categorical_fields=categorical_fields
)
targets_train = labels_train.get_label_field(label_field="labels")
features_test = images_test.get_input_fields(
imaginal_fields=imaginal_fields, categorical_fields=categorical_fields
)
targets_test = labels_test.get_label_field(label_field="labels")
Using TurboML to make a request to OCR Server
request_model = tb.RestAPIClient(
server_url="http://0.0.0.0:5379/predict",
connection_timeout=10000,
max_request_time=10000,
max_retries=1,
)
server_ready_event.wait(timeout=100)
model_trained = request_model.learn(features_train, targets_train)
outputs_test = model_trained.predict(features_test)
outputs_test