Bring Your Own Models
ONNX - Pytorch

ONNX tutorial with PyTorch

Open In Colab (opens in a new tab)

Install necessary libraries

    !pip install torch --index-url https://download.pytorch.org/whl/cpu
    !pip install onnx==1.14.1

PyTorch - Standard Model Training

The following blocks of code define a standard pytorch dataloader, and model training code. This is completely independent of TurboML.

    import pandas as pd
    import torch
    from torch.utils.data import Dataset, DataLoader
    import torch.nn as nn
    import torch.optim as optim
    import matplotlib.pyplot as plt
    import io
    transactions = pd.read_csv("data/transactions.csv").reset_index()
    labels = pd.read_csv("data/labels.csv").reset_index()
    joined_df = pd.merge(transactions, labels, on="index", how="right")
    joined_df
    X = joined_df.drop("is_fraud", axis=1)
    numerical_fields = [
        "transactionAmount",
        "localHour",
        "isProxyIP",
        "digitalItemCount",
        "physicalItemCount",
    ]
    
    feats = X[numerical_fields]
    targets = joined_df["is_fraud"].astype(int)
    class TransactionsDataset(Dataset):
        def __init__(self, feats, targets):
            self.feats = feats
            self.targets = targets
    
        def __len__(self):
            return len(self.feats)
    
        def __getitem__(self, idx):
            return {
                "x": torch.tensor(self.feats.iloc[idx], dtype=torch.float),
                "y": torch.tensor(self.targets.iloc[idx], dtype=torch.float),
            }
    class NeuralNet(nn.Module):
        def __init__(self, input_size):
            super(NeuralNet, self).__init__()
            self.fc1 = nn.Linear(input_size, 64)
            self.fc2 = nn.Linear(64, 64)
            self.fc3 = nn.Linear(
                64, 2
            )  # Output size is 2 for binary classification (fraud or not fraud)
    
        def forward(self, x):
            # x --> (batch_size, input_size)
            x = torch.relu(self.fc1(x))
            # x --> (batch_size, 64)
            x = torch.relu(self.fc2(x))
            # x --> (batch_size, 64)
            x = self.fc3(x)
            # x --> (batch_size, 2)
            return x
    model = NeuralNet(input_size=feats.shape[1])
    model
    ds = TransactionsDataset(feats, targets)
    ds[0]
    train_size = int(0.8 * len(ds))
    test_size = len(ds) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(ds, [train_size, test_size])
    batch_size = 64
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
    
        for data in train_loader:
            inputs = data["x"].float().to(device)
            tars = data["y"].long().to(device)
    
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, tars)
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
    
        avg_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")
    model.eval()  # Set the model to evaluation mode
    total_correct = 0
    total_samples = 0
    
    with torch.no_grad():
        for data in test_loader:
            inputs = data["x"].float()
            tars = data["y"].long()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += tars.size(0)
            total_correct += (predicted == tars).sum().item()
    
    accuracy = total_correct / total_samples
    print("Accuracy:", accuracy)

Export model to ONNX format

Exporting a model to ONNX format depends on the framework. Tutorials for different frameworks can be found at https://github.com/onnx/tutorials#converting-to-onnx-format (opens in a new tab)

    model.eval()
    sample_input = torch.randn(1, len(numerical_fields))
    buffer = io.BytesIO()
    torch.onnx.export(model, sample_input, buffer, export_params=True, verbose=True)
    onnx_model_string = buffer.getvalue()

Create an ONNX model with TurboML

Now that we've converted the model to ONNX format, we can deploy it with TurboML.

    import turboml as tb
    transactions = tb.PandasDataset(
        dataset_name="transactions_onxx_torch",
        key_field="index",
        dataframe=transactions,
        upload=True,
    )
    labels = tb.PandasDataset(
        dataset_name="labels_onxx_torch", key_field="index", dataframe=labels, upload=True
    )
    features = transactions.get_input_fields(numerical_fields=numerical_fields)
    label = labels.get_label_field(label_field="is_fraud")
    tb.set_onnx_model("torchmodel", onnx_model_string)
    onnx_model = tb.ONNX(model_save_name="torchmodel")
    deployed_model = onnx_model.deploy("onnx_model_torch", input=features, labels=label)
    deployed_model.add_metric("WindowedAUC")
    model_auc_scores = deployed_model.get_evaluation("WindowedAUC")
    plt.plot([model_auc_score.metric for model_auc_score in model_auc_scores])