TurboML Ibis Quickstart
import turboml as tb
import pandas as pd
import ibis
transactions = tb.datasets.FraudDetectionDatasetFeatures().to_online(
"ibisqs_transactions", load_if_exists=True
)
labels = tb.datasets.FraudDetectionDatasetLabels().to_online(
"ibisqs_transaction_labels", load_if_exists=True
)
The following cells shows how to define features in ibis. The table parameter in the create_ibis_features function takes in the ibis expression to be used to prepare the feature.
table = transactions.to_ibis()
@ibis.udf.scalar.python()
def add_one(x: float) -> float:
return x + 1
table = table.mutate(updated_transaction_amount=add_one(table.transactionAmount))
agged = table.select(
total_transaction_amount=table.updated_transaction_amount.sum().over(
window=ibis.window(preceding=100, following=0, group_by=[table.transactionID]),
order_by=table.timestamp,
),
transactionID=table.transactionID,
is_potential_fraud=(
table.ipCountryCode != table.paymentBillingCountryCode.lower()
).ifelse(1, 0),
ipCountryCode=table.ipCountryCode,
paymentBillingCountryCode=table.paymentBillingCountryCode,
)
transactions.feature_engineering.create_ibis_features(agged)
transactions.feature_engineering.get_local_features()
We need to tell the platform to start computations for all pending features for the given topic. This can be done by calling the materialize_ibis_features function.
transactions.feature_engineering.materialize_ibis_features()
model = tb.RCF(number_of_trees=50)
numerical_fields = ["total_transaction_amount", "is_potential_fraud"]
features = transactions.get_model_inputs(numerical_fields=numerical_fields)
label = labels.get_model_labels(label_field="is_fraud")
deployed_model_rcf = model.deploy(name="demo_model_ibis", input=features, labels=label)
outputs = deployed_model_rcf.get_outputs()
len(outputs)
sample_output = outputs[-1]
sample_output
import matplotlib.pyplot as plt
plt.plot([output["record"].score for output in outputs])
model_endpoints = deployed_model_rcf.get_endpoints()
model_endpoints
transactions_df = transactions.preview_df
model_query_datapoint = (
transactions_df[["transactionID", "ipCountryCode", "paymentBillingCountryCode"]]
.iloc[-1]
.to_dict()
)
model_query_datapoint
import requests
resp = requests.post(
model_endpoints[0], json=model_query_datapoint, headers=tb.common.api.headers
)
resp.json()
Batch Inference on Models
While the above method is more suited for individual requests, we can also perform batch inference on the models. We use the get_inference function for this purpose.
outputs = deployed_model_rcf.get_inference(transactions_df)
outputs