Bring Your Own Models
TF-IDF Example

TF-IDF embedding example using gRPC Client

Open In Colab (opens in a new tab)

This example demonstrates using our gRPC API client to generate TF-IDF embedding.

    !pip install nltk grpc
    from tfidf_grpc_server import serve
    import turboml as tb
    import pandas as pd

Start gRPC server for tfdif embedding from jupyter-notebook

    import threading
    
    
    def run_server_in_background(url):
        serve(url)  # This will start the gRPC server
    
    
    # Start the server in a separate thread
    url = "0.0.0.0:50047"
    server_thread = threading.Thread(
        target=run_server_in_background, args=(url,), daemon=True
    )
    server_thread.start()
    
    print("gRPC server is running in the background...")

Load text dataset

    import re
    
    file_path = "data/tfidf_test_data.txt"
    with open(file_path, "r") as file:
        text = file.read()
    
    sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
    
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    labels = [0] * len(sentences)
    text_dict_test = {"text": sentences}
    label_dict_test = {"labels": labels}
    text_df_test = pd.DataFrame(text_dict_test)
    label_df_test = pd.DataFrame(label_dict_test)
    text_df_test.reset_index(inplace=True)
    label_df_test.reset_index(inplace=True)
    text_df_test = text_df_test.reset_index(drop=True)
    label_df_test = label_df_test.reset_index(drop=True)
    text_train = tb.PandasDataset(
        dataframe=text_df_test, key_field="index", streaming=False
    )
    labels_train = tb.PandasDataset(
        dataframe=label_df_test, key_field="index", streaming=False
    )
    
    text_test = tb.PandasDataset(dataframe=text_df_test, key_field="index", streaming=False)
    labels_test = tb.PandasDataset(
        dataframe=label_df_test, key_field="index", streaming=False
    )
    textual_fields = ["text"]
    features_train = text_train.get_input_fields(textual_fields=textual_fields)
    targets_train = labels_train.get_label_field(label_field="labels")
    
    features_test = text_test.get_input_fields(textual_fields=textual_fields)
    targets_test = labels_test.get_label_field(label_field="labels")

Using TurboML Client to request gRPC server

    grpc_model = tb.GRPCClient(
        server_url="0.0.0.0:50047",
        connection_timeout=10000,
        max_request_time=10000,
        max_retries=1,
    )
    model_trained = grpc_model.learn(features_train, targets_train)
    outputs_test = model_trained.predict(features_test)
    outputs_test