TF-IDF embedding example using gRPC Client
This example demonstrates using our gRPC API client to generate TF-IDF embedding.
!pip install nltk grpc
from tfidf_grpc_server import serve
import turboml as tb
import pandas as pd
Start gRPC server for tfdif embedding from jupyter-notebook
import threading
def run_server_in_background(url):
serve(url) # This will start the gRPC server
# Start the server in a separate thread
url = "0.0.0.0:50047"
server_thread = threading.Thread(
target=run_server_in_background, args=(url,), daemon=True
)
server_thread.start()
print("gRPC server is running in the background...")
Load text dataset
import re
file_path = "data/tfidf_test_data.txt"
with open(file_path, "r") as file:
text = file.read()
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
labels = [0] * len(sentences)
text_dict_test = {"text": sentences}
label_dict_test = {"labels": labels}
text_df_test = pd.DataFrame(text_dict_test)
label_df_test = pd.DataFrame(label_dict_test)
text_df_test.reset_index(inplace=True)
label_df_test.reset_index(inplace=True)
text_df_test = text_df_test.reset_index(drop=True)
label_df_test = label_df_test.reset_index(drop=True)
text_train = tb.PandasDataset(
dataframe=text_df_test, key_field="index", streaming=False
)
labels_train = tb.PandasDataset(
dataframe=label_df_test, key_field="index", streaming=False
)
text_test = tb.PandasDataset(dataframe=text_df_test, key_field="index", streaming=False)
labels_test = tb.PandasDataset(
dataframe=label_df_test, key_field="index", streaming=False
)
textual_fields = ["text"]
features_train = text_train.get_input_fields(textual_fields=textual_fields)
targets_train = labels_train.get_label_field(label_field="labels")
features_test = text_test.get_input_fields(textual_fields=textual_fields)
targets_test = labels_test.get_label_field(label_field="labels")
Using TurboML Client to request gRPC server
grpc_model = tb.GRPCClient(
server_url="0.0.0.0:50047",
connection_timeout=10000,
max_request_time=10000,
max_retries=1,
)
model_trained = grpc_model.learn(features_train, targets_train)
outputs_test = model_trained.predict(features_test)
outputs_test