ONNX tutorial with PyTorch
Install necessary libraries
!pip install torch --index-url https://download.pytorch.org/whl/cpu
!pip install onnx==1.14.1
PyTorch - Standard Model Training
The following blocks of code define a standard pytorch dataloader, and model training code. This is completely independent of TurboML.
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import io
transactions = pd.read_csv("data/transactions.csv").reset_index()
labels = pd.read_csv("data/labels.csv").reset_index()
joined_df = pd.merge(transactions, labels, on="index", how="right")
joined_df
X = joined_df.drop("is_fraud", axis=1)
numerical_fields = [
"transactionAmount",
"localHour",
"isProxyIP",
"digitalItemCount",
"physicalItemCount",
]
feats = X[numerical_fields]
targets = joined_df["is_fraud"].astype(int)
class TransactionsDataset(Dataset):
def __init__(self, feats, targets):
self.feats = feats
self.targets = targets
def __len__(self):
return len(self.feats)
def __getitem__(self, idx):
return {
"x": torch.tensor(self.feats.iloc[idx], dtype=torch.float),
"y": torch.tensor(self.targets.iloc[idx], dtype=torch.float),
}
class NeuralNet(nn.Module):
def __init__(self, input_size):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, 64)
self.fc2 = nn.Linear(64, 64)
self.fc3 = nn.Linear(
64, 2
) # Output size is 2 for binary classification (fraud or not fraud)
def forward(self, x):
# x --> (batch_size, input_size)
x = torch.relu(self.fc1(x))
# x --> (batch_size, 64)
x = torch.relu(self.fc2(x))
# x --> (batch_size, 64)
x = self.fc3(x)
# x --> (batch_size, 2)
return x
model = NeuralNet(input_size=feats.shape[1])
model
ds = TransactionsDataset(feats, targets)
ds[0]
train_size = int(0.8 * len(ds))
test_size = len(ds) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(ds, [train_size, test_size])
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
num_epochs = 10
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for data in train_loader:
inputs = data["x"].float().to(device)
tars = data["y"].long().to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, tars)
loss.backward()
optimizer.step()
running_loss += loss.item()
avg_loss = running_loss / len(train_loader)
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")
model.eval() # Set the model to evaluation mode
total_correct = 0
total_samples = 0
with torch.no_grad():
for data in test_loader:
inputs = data["x"].float()
tars = data["y"].long()
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total_samples += tars.size(0)
total_correct += (predicted == tars).sum().item()
accuracy = total_correct / total_samples
print("Accuracy:", accuracy)
Export model to ONNX format
Exporting a model to ONNX format depends on the framework. Tutorials for different frameworks can be found at https://github.com/onnx/tutorials#converting-to-onnx-format (opens in a new tab)
model.eval()
sample_input = torch.randn(1, len(numerical_fields))
buffer = io.BytesIO()
torch.onnx.export(model, sample_input, buffer, export_params=True, verbose=True)
onnx_model_string = buffer.getvalue()
Create an ONNX model with TurboML
Now that we've converted the model to ONNX format, we can deploy it with TurboML.
import turboml as tb
transactions = tb.PandasDataset(
dataset_name="transactions_onxx_torch",
key_field="index",
dataframe=transactions,
upload=True,
)
labels = tb.PandasDataset(
dataset_name="labels_onxx_torch", key_field="index", dataframe=labels, upload=True
)
features = transactions.get_input_fields(numerical_fields=numerical_fields)
label = labels.get_label_field(label_field="is_fraud")
tb.set_onnx_model("torchmodel", onnx_model_string)
onnx_model = tb.ONNX(model_save_name="torchmodel")
deployed_model = onnx_model.deploy("onnx_model_torch", input=features, labels=label)
deployed_model.add_metric("WindowedAUC")
model_auc_scores = deployed_model.get_evaluation("WindowedAUC")
plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])