Image Processing (MNIST Example)
import turboml as tbimport pandas as pd
from torchvision import datasets, transforms
import io
from PIL import Imageclass PILToBytes:
def __init__(self, format="JPEG"):
self.format = format
def __call__(self, img):
if not isinstance(img, Image.Image):
raise TypeError(f"Input should be a PIL Image, but got {type(img)}.")
buffer = io.BytesIO()
img.save(buffer, format=self.format)
return buffer.getvalue()
transform = transforms.Compose(
[
transforms.Resize((28, 28)),
PILToBytes(format="PNG"),
]
)Data Inspection
Downloading the MNIST dataset to be used in ML modelling.
mnist_dataset_train = datasets.MNIST(
root="./data", train=True, download=True, transform=transform
)
mnist_dataset_test = datasets.MNIST(
root="./data", train=False, download=True, transform=transform
)images_train = []
images_test = []
labels_train = []
labels_test = []
for image, label in mnist_dataset_train:
images_train.append(image)
labels_train.append(label)
for image, label in mnist_dataset_test:
images_test.append(image)
labels_test.append(label)Transforming the lists into Pandas DataFrames.
image_dict_train = {"images": images_train}
label_dict_train = {"labels": labels_train}
image_df_train = pd.DataFrame(image_dict_train)
label_df_train = pd.DataFrame(label_dict_train)
image_dict_test = {"images": images_test}
label_dict_test = {"labels": labels_test}
image_df_test = pd.DataFrame(image_dict_test)
label_df_test = pd.DataFrame(label_dict_test)Adding index columns to the DataFrames to act as primary keys for the datasets.
image_df_train.reset_index(inplace=True)
label_df_train.reset_index(inplace=True)
image_df_test.reset_index(inplace=True)
label_df_test.reset_index(inplace=True)image_df_train.head()label_df_train.head()Using LocalDataset class for compatibility with the TurboML platform.
images_train = tb.LocalDataset.from_pd(df=image_df_train, key_field="index")
labels_train = tb.LocalDataset.from_pd(df=label_df_train, key_field="index")
images_test = tb.LocalDataset.from_pd(df=image_df_test, key_field="index")
labels_test = tb.LocalDataset.from_pd(df=label_df_test, key_field="index")Extracting the features and the targets from the TurboML-compatible datasets.
imaginal_fields = ["images"]
features_train = images_train.get_model_inputs(imaginal_fields=imaginal_fields)
targets_train = labels_train.get_model_labels(label_field="labels")
features_test = images_test.get_model_inputs(imaginal_fields=imaginal_fields)
targets_test = labels_test.get_model_labels(label_field="labels")Model Initialization
Defining a Neural Network (NN) to be used on the MNIST data.
The output_size of the final layer in the NN is 10 in the case of MNIST.
Since this is a classification task, Cross Entropy loss is used with the Adam optimizer.
final_layer = tb.NNLayer(output_size=10, activation="none")
model = tb.NeuralNetwork(
loss_function="cross_entropy", optimizer="adam", learning_rate=0.01
)
model.layers[-1] = final_layerImageToNumeric PreProcessor
Since we are dealing with images as input to the model, we select the ImageToNumeric PreProcessor to accordingly convert the binary images into numerical data useful to the NN.
model = tb.ImageToNumericPreProcessor(base_model=model, image_sizes=[28, 28, 1])Model Training
Setting the model combined with the ImageToNumeric PreProcessor to learn on the training data.
model = model.learn(features_train, targets_train)Model Inference
Performing inference on the trained model using the test data.
outputs_test = model.predict(features_test)outputs_testModel Testing
Testing the trained model's performance on the test data.
from sklearn import metricslabels_test_list = labels_test.input_df["labels"].to_list()print(
"Accuracy: ",
metrics.accuracy_score(labels_test_list, outputs_test["predicted_class"]),
)
print(
"F1: ",
metrics.f1_score(
labels_test_list, outputs_test["predicted_class"], average="macro"
),
)
print(
"Precision: ",
metrics.precision_score(
labels_test_list, outputs_test["predicted_class"], average="macro"
),
)