Write Your Own Models
Native Python Model

Native Python Models

Open In Colab (opens in a new tab)

While TurboML offers a wide array of algorithms implemented with performant machine-native code, we also give you the flexibility to use your own models in Python when necessary, allowing the use of any public library from PyPi. Lets walk through some simple examples for model based on River (opens in a new tab) and scikit-learn (opens in a new tab).

Imports and Utilities

    from river import datasets
    import pandas as pd
    import turboml as tb

Prepare an Evaluation Dataset

We choose standard Credit Card Fraud dataset that ships with River to evaluate our models on.

    dataset = datasets.CreditCard()
    dataset
    sample, score = next(iter(dataset))
    sample
    sample_inputs = []
    sample_labels = []
    
    for sample, score in dataset:
        sample_inputs.append(sample)
        sample_labels.append({"score": score})
    df_features = pd.DataFrame.from_dict(sample_inputs)
    df_labels = pd.DataFrame.from_dict(sample_labels)
    df_features = df_features.drop(
        ["Time"], axis=1
    )  # We don't want to use this feature for this example

And finally load them as datasets in the TurboML Platform

    features = tb.PandasDataset(
        dataset_name="cc_feats_native",
        key_field="index",
        dataframe=df_features.reset_index(),
        upload=True,
    )
    labels = tb.PandasDataset(
        dataset_name="cc_labels_native",
        key_field="index",
        dataframe=df_labels.reset_index(),
        upload=True,
    )

Isolate features

    numerical_cols = df_features.columns.tolist()
    input_features = features.get_input_fields(numerical_fields=numerical_cols)
    label = labels.get_label_field(label_field="score")

Structure of User Defined Models

A custom Python model must implement 3 instance methods - learn_one, predict_one and init_imports. The interface and usage is described below and explored further in the examples contained in this notebook.

class CustomModel:
    def init_imports(self):
        """
        Import any external symbols/modules used in this class
        """
        pass
 
    def learn_one(self, input: types.InputData):
        """
        Receives labelled data for the model to learn from
        """
        pass
 
    def predict_one(self, input: types.InputData, output: types.OutputData):
        """
        Receives input features for a prediction, must pass output to the
        output object
        """
        pass

Example - Leveraging River (opens in a new tab)

River is a popular ML library for online machine learning, river comes with an inbuilt functionality for learn_one and predict_one out of the box, however it is important to note the differences in input to the User Defined models and the input of river model, which takes a dictionary and label as inputs for a supervised algorithm. In this example we create a custom model using river according to the standards mentioned above and put it in a separate python module.

    !pip install river
    from river import linear_model
    import turboml.common.pytypes as types
    
    
    class MyLogisticRegression:
        def __init__(self):
            self.model = linear_model.LogisticRegression()
    
        def init_imports(self):
            from river import linear_model
    
        def learn_one(self, input: types.InputData):
            self.model.learn_one(dict(enumerate(input.numeric)), input.label)
    
        def predict_one(self, input: types.InputData, output: types.OutputData):
            score = float(self.model.predict_one(dict(enumerate(input.numeric))))
            output.set_score(score)
    
            # example: setting embeddings
            # output.resize_embeddings(3)
            # mut = output.embeddings()
            # mut[0] = 1
            # mut[1] = 2
            # mut[2] = 3
    
            # example: appending to feature scores
            # this api is an alternative to resize + set as above,
            # but less efficient
            # output.append_feature_score(0.5)

Since python packages can have multiple external dependencies we can make use of tb.setup_venv(name_of_venv, [List of packages]). This can create a virtual environment that enables interaction with the platform and the installation of external dependencies with ease.

    venv = tb.setup_venv("my_river_venv", ["river", "numpy"])
    venv.add_python_class(MyLogisticRegression)
    river_model = tb.Python(class_name=MyLogisticRegression.__name__, venv_name=venv.name)
    deployed_model_river = river_model.deploy(
        "river_model", input=input_features, labels=label
    )
    import matplotlib.pyplot as plt
    
    deployed_model_river.add_metric("WindowedRMSE")
    model_auc_scores = deployed_model_river.get_evaluation("WindowedRMSE")
    plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])

Example - An Online Model with Sci-Kit Learn

Using Scikit learn you can implement online learning something similar to the code example below using partial_fit().

    !pip install scikit-learn
    from sklearn.linear_model import Perceptron
    import numpy as np
    import turboml.common.pytypes as types
    
    
    class MyPerceptron:
        def __init__(self):
            self.model = Perceptron()
            self.fitted = False
    
        def init_imports(self):
            from sklearn.linear_model import Perceptron
    
        def learn_one(self, input: types.InputData):
            if not self.fitted:
                self.model.partial_fit(
                    np.array(input.numeric).reshape(1, -1),
                    np.array(input.label).reshape(-1),
                    classes=[0, 1],
                )
                self.fitted = True
            else:
                self.model.partial_fit(
                    np.array(input.numeric).reshape(1, -1),
                    np.array(input.label).reshape(-1),
                )
    
        def predict_one(self, input: types.InputData, output: types.OutputData):
            if self.fitted:
                score = self.model.predict(np.array(input.numeric).reshape(1, -1))[0]
                output.set_score(score)
            else:
                output.set_score(0.0)
    venv = tb.setup_venv("my_sklearn_venv", ["scikit-learn"])
    venv.add_python_class(MyPerceptron)
    sklearn_model = tb.Python(class_name=MyPerceptron.__name__, venv_name=venv.name)
    deployed_model_sklearn = sklearn_model.deploy(
        "sklearn_model", input=input_features, labels=label
    )
    import matplotlib.pyplot as plt
    
    deployed_model_sklearn.add_metric("WindowedRMSE")
    model_auc_scores = deployed_model_sklearn.get_evaluation("WindowedRMSE")
    plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])

Example - Leveraging Vowpal Wabbit (opens in a new tab)

Vowpal Wabbit provides fast, efficient, and flexible online machine learning techniques for reinforcement learning, supervised learning, and more.

In this example we use the new vowpal-wabbit-next Python bindings. Note that we need to transform our input to Vowpal's native text format.

    !pip install vowpal-wabbit-next
    import vowpal_wabbit_next as vw
    import turboml.common.pytypes as types
    
    
    class MyVowpalModel:
        def __init__(self):
            self.vw_workspace = vw.Workspace()
            self.vw_parser = vw.TextFormatParser(self.vw_workspace)
    
        def init_imports(self):
            import vowpal_wabbit_next as vw
    
        def to_vw_format(self, features, label=None):
            "Convert a feature vector into the Vowpal Wabbit format"
            label_place = f"{label} " if label is not None else ""
            vw_text = f"{label_place}| {' '.join([f'{idx}:{feat}' for idx, feat in enumerate(features, start=1)])}\n"
            return self.vw_parser.parse_line(vw_text)
    
        def predict_one(self, input: types.InputData, output: types.OutputData):
            vw_format = self.to_vw_format(input.numeric)
            output.set_score(self.vw_workspace.predict_one(vw_format))
    
        def learn_one(self, input: types.InputData):
            vw_format = self.to_vw_format(input.numeric, input.label)
            self.vw_workspace.learn_one(vw_format)

In the below cell we make use of the custom virtual environment created before to install new packages in this case vowpalwabbit. We have to ensure that the name of the virtual environment remains the same and we can reuse the virtual environment multiple times.

    venv = tb.setup_venv("my_vowpal_venv", ["vowpal-wabbit-next"])
    venv.add_python_class(MyVowpalModel)
    vw_model = tb.Python(class_name=MyVowpalModel.__name__, venv_name=venv.name)
    deployed_model_vw = vw_model.deploy("vw_model", input=input_features, labels=label)
    import matplotlib.pyplot as plt
    
    deployed_model_vw.add_metric("WindowedRMSE")
    model_auc_scores = deployed_model_vw.get_evaluation("WindowedRMSE")
    plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])