Bring Your Own Models
TF-IDF Example

TF-IDF embedding example using gRPC Client

Open In Colab (opens in a new tab)

This example demonstrates using our gRPC API client to generate TF-IDF embedding.

import turboml as tb
!pip install nltk grpcio

Start gRPC server for tfdif embedding from jupyter-notebook

import pandas as pd
from utils.tfidf_grpc_server import serve
import threading
 
 
def run_server_in_background(url):
    serve(url)  # This will start the gRPC server
 
 
# Start the server in a separate thread
url = "0.0.0.0:50047"
server_thread = threading.Thread(
    target=run_server_in_background, args=(url,), daemon=True
)
server_thread.start()
 
print("gRPC server is running in the background...")

Load text dataset

import re
import urllib.request
 
with urllib.request.urlopen(
    "https://raw.githubusercontent.com/TurboML-Inc/colab-notebooks/refs/heads/main/data/tfidf_test_data.txt"
) as file:
    text = file.read().decode()
 
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
 
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
labels = [0] * len(sentences)
text_dict_test = {"text": sentences}
label_dict_test = {"labels": labels}
text_df_test = pd.DataFrame(text_dict_test)
label_df_test = pd.DataFrame(label_dict_test)
text_df_test.reset_index(inplace=True)
label_df_test.reset_index(inplace=True)
text_df_test = text_df_test.reset_index(drop=True)
label_df_test = label_df_test.reset_index(drop=True)
text_train = tb.LocalDataset.from_pd(df=text_df_test, key_field="index")
labels_train = tb.LocalDataset.from_pd(df=label_df_test, key_field="index")
 
text_test = tb.LocalDataset.from_pd(df=text_df_test, key_field="index")
labels_test = tb.LocalDataset.from_pd(df=label_df_test, key_field="index")
textual_fields = ["text"]
features_train = text_train.get_model_inputs(textual_fields=textual_fields)
targets_train = labels_train.get_model_labels(label_field="labels")
 
features_test = text_test.get_model_inputs(textual_fields=textual_fields)
targets_test = labels_test.get_model_labels(label_field="labels")

Using TurboML Client to request gRPC server

grpc_model = tb.GRPCClient(
    server_url="0.0.0.0:50047",
    connection_timeout=10000,
    max_request_time=10000,
    max_retries=1,
)
model_trained = grpc_model.learn(features_train, targets_train)
outputs_test = model_trained.predict(features_test)
outputs_test