Write Your Own Models
Batch Python Model

Python Model: Batch Example

Open In Colab (opens in a new tab)

In this example we emulate batch training of custom models defined using TurboML's Python model.

import pandas as pd
import turboml as tb
import numpy as np

Model Definition

Here we define MyBatchModel with buffers to store the input features and labels until we exceed our buffer limit. Then, the model can be brained all at once on the buffered samples.

We use Scikit-Learn's Perceptron for this task.

from sklearn.linear_model import Perceptron
import turboml.common.pytypes as types
 
 
class MyBatchModel:
    def __init__(self):
        self.model = Perceptron()
        self.X_buffer = []
        self.y_buffer = []
        self.batch_size = 64
        self.trained = False
 
    def init_imports(self):
        from sklearn.linear_model import Perceptron
        import numpy as np
 
    def learn_one(self, input: types.InputData):
        self.X_buffer.append(input.numeric)
        self.y_buffer.append(input.label)
 
        if len(self.X_buffer) >= self.batch_size:
            self.model = self.model.partial_fit(
                np.array(self.X_buffer), np.array(self.y_buffer), classes=[0, 1]
            )
 
            self.X_buffer = []
            self.y_buffer = []
 
            self.trained = True
 
    def predict_one(self, input: types.InputData, output: types.OutputData):
        if self.trained:
            prediction = self.model.predict(np.array(input.numeric).reshape(1, -1))[0]
 
            output.set_predicted_class(prediction)
        else:
            output.set_score(0.0)

Now, we define a custom virtual environment with the correct list of dependencies which the model will be using, and link our model to this venv.

venv = tb.setup_venv("my_batch_python_venv", ["scikit-learn", "numpy<2"])
venv.add_python_class(MyBatchModel)

Model Deployment

Once the virtual environment is ready, we prepare the dataset to be used in this task and deploy the model with its features and labels.

batch_model = tb.Python(class_name=MyBatchModel.__name__, venv_name=venv.name)
transactions_df = pd.read_csv("data/transactions.csv").reset_index()
labels_df = pd.read_csv("data/labels.csv").reset_index()
try:
    transactions = tb.PandasDataset(
        dataset_name="transactions_batch_python",
        key_field="index",
        dataframe=transactions_df,
        upload=True,
    )
except:
    transactions = tb.PandasDataset(dataset_name="transactions_batch_python")
 
try:
    labels = tb.PandasDataset(
        dataset_name="labels_batch_python",
        key_field="index",
        dataframe=labels_df,
        upload=True,
    )
except:
    labels = tb.PandasDataset(dataset_name="labels_batch_python")
numerical_fields = [
    "transactionAmount",
    "localHour",
    "isProxyIP",
    "digitalItemCount",
    "physicalItemCount",
]
features = transactions.get_input_fields(numerical_fields=numerical_fields)
label = labels.get_label_field(label_field="is_fraud")
deployed_batch_model = batch_model.deploy("batch_model", input=features, labels=label)

Evaluation

import matplotlib.pyplot as plt
 
deployed_batch_model.add_metric("WindowedRMSE")
model_auc_scores = deployed_batch_model.get_evaluation("WindowedRMSE")
plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])