Bring Your Own Models
TF-IDF Example

TF-IDF embedding example using gRPC Client

Open In Colab (opens in a new tab)

This example demonstrates using our gRPC API client to generate TF-IDF embedding.

import pandas as pd
import turboml as tb
from tfidf_grpc_server import serve
!pip install nltk grpcio

Start gRPC server for tfdif embedding from jupyter-notebook

import threading
 
 
def run_server_in_background(url):
    serve(url)  # This will start the gRPC server
 
 
# Start the server in a separate thread
url = "0.0.0.0:50047"
server_thread = threading.Thread(
    target=run_server_in_background, args=(url,), daemon=True
)
server_thread.start()
 
print("gRPC server is running in the background...")

Load text dataset

import re
 
file_path = "data/tfidf_test_data.txt"
with open(file_path, "r") as file:
    text = file.read()
 
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
 
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
labels = [0] * len(sentences)
text_dict_test = {"text": sentences}
label_dict_test = {"labels": labels}
text_df_test = pd.DataFrame(text_dict_test)
label_df_test = pd.DataFrame(label_dict_test)
text_df_test.reset_index(inplace=True)
label_df_test.reset_index(inplace=True)
text_df_test = text_df_test.reset_index(drop=True)
label_df_test = label_df_test.reset_index(drop=True)
text_train = tb.PandasDataset(
    dataframe=text_df_test, key_field="index", streaming=False
)
labels_train = tb.PandasDataset(
    dataframe=label_df_test, key_field="index", streaming=False
)
 
text_test = tb.PandasDataset(dataframe=text_df_test, key_field="index", streaming=False)
labels_test = tb.PandasDataset(
    dataframe=label_df_test, key_field="index", streaming=False
)
textual_fields = ["text"]
features_train = text_train.get_input_fields(textual_fields=textual_fields)
targets_train = labels_train.get_label_field(label_field="labels")
 
features_test = text_test.get_input_fields(textual_fields=textual_fields)
targets_test = labels_test.get_label_field(label_field="labels")

Using TurboML Client to request gRPC server

grpc_model = tb.GRPCClient(
    server_url="0.0.0.0:50047",
    connection_timeout=10000,
    max_request_time=10000,
    max_retries=1,
)
model_trained = grpc_model.learn(features_train, targets_train)
outputs_test = model_trained.predict(features_test)
outputs_test