Native Python Models
While TurboML offers a wide array of algorithms implemented with performant machine-native code, we also give you the flexibility to use your own models in Python when necessary, allowing the use of any public library from PyPi. Lets walk through some simple examples for model based on River (opens in a new tab) and scikit-learn (opens in a new tab).
Imports and Utilities
from river import datasets
import pandas as pd
import turboml as tb
Prepare an Evaluation Dataset
We choose standard Credit Card Fraud dataset that ships with River to evaluate our models on.
dataset = datasets.CreditCard()
dataset
sample, score = next(iter(dataset))
sample
sample_inputs = []
sample_labels = []
for sample, score in dataset:
sample_inputs.append(sample)
sample_labels.append({"score": score})
df_features = pd.DataFrame.from_dict(sample_inputs)
df_labels = pd.DataFrame.from_dict(sample_labels)
df_features = df_features.drop(
["Time"], axis=1
) # We don't want to use this feature for this example
And finally load them as datasets in the TurboML Platform
features = tb.PandasDataset(
dataset_name="cc_feats_native",
key_field="index",
dataframe=df_features.reset_index(),
upload=True,
)
labels = tb.PandasDataset(
dataset_name="cc_labels_native",
key_field="index",
dataframe=df_labels.reset_index(),
upload=True,
)
Isolate features
numerical_cols = df_features.columns.tolist()
input_features = features.get_input_fields(numerical_fields=numerical_cols)
label = labels.get_label_field(label_field="score")
Structure of User Defined Models
A custom Python model must implement 3 instance methods - learn_one
, predict_one
and init_imports
.
The interface and usage is described below and explored further in the examples contained in this notebook.
class CustomModel:
def init_imports(self):
"""
Import any external symbols/modules used in this class
"""
pass
def learn_one(self, input: types.InputData):
"""
Receives labelled data for the model to learn from
"""
pass
def predict_one(self, input: types.InputData, output: types.OutputData):
"""
Receives input features for a prediction, must pass output to the
output object
"""
pass
Example - Leveraging River (opens in a new tab)
River is a popular ML library for online machine learning, river comes with an inbuilt functionality for learn_one
and predict_one
out of the box, however it is important to note the differences in input to the User Defined models and the input of river model, which takes a dictionary and label as inputs for a supervised algorithm. In this example we create a custom model using river according to the standards mentioned above and put it in a separate python module.
!pip install river
from river import linear_model
import turboml.common.pytypes as types
class MyLogisticRegression:
def __init__(self):
self.model = linear_model.LogisticRegression()
def init_imports(self):
from river import linear_model
def learn_one(self, input: types.InputData):
self.model.learn_one(dict(enumerate(input.numeric)), input.label)
def predict_one(self, input: types.InputData, output: types.OutputData):
score = float(self.model.predict_one(dict(enumerate(input.numeric))))
output.set_score(score)
# example: setting embeddings
# output.resize_embeddings(3)
# mut = output.embeddings()
# mut[0] = 1
# mut[1] = 2
# mut[2] = 3
# example: appending to feature scores
# this api is an alternative to resize + set as above,
# but less efficient
# output.append_feature_score(0.5)
Since python packages can have multiple external dependencies we can make use of tb.setup_venv(name_of_venv, [List of packages])
. This can create a virtual environment that enables interaction with the platform and the installation of external dependencies with ease.
venv = tb.setup_venv("my_river_venv", ["river", "numpy"])
venv.add_python_class(MyLogisticRegression)
river_model = tb.Python(class_name=MyLogisticRegression.__name__, venv_name=venv.name)
deployed_model_river = river_model.deploy(
"river_model", input=input_features, labels=label
)
import matplotlib.pyplot as plt
deployed_model_river.add_metric("WindowedRMSE")
model_auc_scores = deployed_model_river.get_evaluation("WindowedRMSE")
plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])
Example - An Online Model with Sci-Kit Learn
Using Scikit learn you can implement online learning something similar to the code example below using partial_fit()
.
!pip install scikit-learn
from sklearn.linear_model import Perceptron
import numpy as np
import turboml.common.pytypes as types
class MyPerceptron:
def __init__(self):
self.model = Perceptron()
self.fitted = False
def init_imports(self):
from sklearn.linear_model import Perceptron
def learn_one(self, input: types.InputData):
if not self.fitted:
self.model.partial_fit(
np.array(input.numeric).reshape(1, -1),
np.array(input.label).reshape(-1),
classes=[0, 1],
)
self.fitted = True
else:
self.model.partial_fit(
np.array(input.numeric).reshape(1, -1),
np.array(input.label).reshape(-1),
)
def predict_one(self, input: types.InputData, output: types.OutputData):
if self.fitted:
score = self.model.predict(np.array(input.numeric).reshape(1, -1))[0]
output.set_score(score)
else:
output.set_score(0.0)
venv = tb.setup_venv("my_sklearn_venv", ["scikit-learn"])
venv.add_python_class(MyPerceptron)
sklearn_model = tb.Python(class_name=MyPerceptron.__name__, venv_name=venv.name)
deployed_model_sklearn = sklearn_model.deploy(
"sklearn_model", input=input_features, labels=label
)
import matplotlib.pyplot as plt
deployed_model_sklearn.add_metric("WindowedRMSE")
model_auc_scores = deployed_model_sklearn.get_evaluation("WindowedRMSE")
plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])
Example - Leveraging Vowpal Wabbit (opens in a new tab)
Vowpal Wabbit provides fast, efficient, and flexible online machine learning techniques for reinforcement learning, supervised learning, and more.
In this example we use the new vowpal-wabbit-next
Python bindings. Note that we need to transform our input to Vowpal's native text format.
!pip install vowpal-wabbit-next
import vowpal_wabbit_next as vw
import turboml.common.pytypes as types
class MyVowpalModel:
def __init__(self):
self.vw_workspace = vw.Workspace()
self.vw_parser = vw.TextFormatParser(self.vw_workspace)
def init_imports(self):
import vowpal_wabbit_next as vw
def to_vw_format(self, features, label=None):
"Convert a feature vector into the Vowpal Wabbit format"
label_place = f"{label} " if label is not None else ""
vw_text = f"{label_place}| {' '.join([f'{idx}:{feat}' for idx, feat in enumerate(features, start=1)])}\n"
return self.vw_parser.parse_line(vw_text)
def predict_one(self, input: types.InputData, output: types.OutputData):
vw_format = self.to_vw_format(input.numeric)
output.set_score(self.vw_workspace.predict_one(vw_format))
def learn_one(self, input: types.InputData):
vw_format = self.to_vw_format(input.numeric, input.label)
self.vw_workspace.learn_one(vw_format)
In the below cell we make use of the custom virtual environment created before to install new packages in this case vowpalwabbit. We have to ensure that the name of the virtual environment remains the same and we can reuse the virtual environment multiple times.
venv = tb.setup_venv("my_vowpal_venv", ["vowpal-wabbit-next"])
venv.add_python_class(MyVowpalModel)
vw_model = tb.Python(class_name=MyVowpalModel.__name__, venv_name=venv.name)
deployed_model_vw = vw_model.deploy("vw_model", input=input_features, labels=label)
import matplotlib.pyplot as plt
deployed_model_vw.add_metric("WindowedRMSE")
model_auc_scores = deployed_model_vw.get_evaluation("WindowedRMSE")
plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])