Write Your Own Models
Batch Python Model

Python Model: Batch Example

Open In Colab (opens in a new tab)

In this example we emulate batch training of custom models defined using TurboML's Python model.

    import turboml as tb
    import pandas as pd
    import numpy as np

Model Definition

Here we define MyBatchModel with buffers to store the input features and labels until we exceed our buffer limit. Then, the model can be brained all at once on the buffered samples.

We use Scikit-Learn's Perceptron for this task.

    from sklearn.linear_model import Perceptron
    import turboml.common.pytypes as types
    
    
    class MyBatchModel:
        def __init__(self):
            self.model = Perceptron()
            self.X_buffer = []
            self.y_buffer = []
            self.batch_size = 64
            self.trained = False
    
        def init_imports(self):
            from sklearn.linear_model import Perceptron
            import numpy as np
    
        def learn_one(self, input: types.InputData):
            self.X_buffer.append(input.numeric)
            self.y_buffer.append(input.label)
    
            if len(self.X_buffer) >= self.batch_size:
                self.model = self.model.partial_fit(
                    np.array(self.X_buffer), np.array(self.y_buffer), classes=[0, 1]
                )
    
                self.X_buffer = []
                self.y_buffer = []
    
                self.trained = True
    
        def predict_one(self, input: types.InputData, output: types.OutputData):
            if self.trained:
                prediction = self.model.predict(np.array(input.numeric).reshape(1, -1))[0]
    
                output.set_predicted_class(prediction)
            else:
                output.set_score(0.0)

Now, we define a custom virtual environment with the correct list of dependencies which the model will be using, and link our model to this venv.

    venv = tb.setup_venv("my_batch_python_venv", ["scikit-learn", "numpy<2"])
    venv.add_python_class(MyBatchModel)

Model Deployment

Once the virtual environment is ready, we prepare the dataset to be used in this task and deploy the model with its features and labels.

    batch_model = tb.Python(class_name=MyBatchModel.__name__, venv_name=venv.name)
    transactions_df = pd.read_csv("data/transactions.csv").reset_index()
    labels_df = pd.read_csv("data/labels.csv").reset_index()
    transactions = tb.PandasDataset(
        dataset_name="transactions_batch_python",
        key_field="index",
        dataframe=transactions_df,
        upload=True,
    )
    labels = tb.PandasDataset(
        dataset_name="labels_batch_python",
        key_field="index",
        dataframe=labels_df,
        upload=True,
    )
    numerical_fields = [
        "transactionAmount",
        "localHour",
        "isProxyIP",
        "digitalItemCount",
        "physicalItemCount",
    ]
    features = transactions.get_input_fields(numerical_fields=numerical_fields)
    label = labels.get_label_field(label_field="is_fraud")
    deployed_batch_model = batch_model.deploy("batch_model", input=features, labels=label)

Evaluation

    import matplotlib.pyplot as plt
    
    deployed_batch_model.add_metric("WindowedRMSE")
    model_auc_scores = deployed_batch_model.get_evaluation("WindowedRMSE")
    plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])