import random
import colorsys

import relationalai as rai
from relationalai.std import aggregates, rel
from relationalai.std.graphs import Graph

random.seed(123)

provider = rai.Provider()

provider.sql("""
begin
    create schema if not exists RAI_DEMO.TASTYBYTES;

    create or replace stage RAI_DEMO.TASTYBYTES.TMP
    URL = 'azure://raidocs.blob.core.windows.net/demos';

    create or replace file format parquet type='PARQUET';

    create or replace table RAI_DEMO.TASTYBYTES.ORDERS
    as
    select
        $1:ORDER_ID::int as ORDER_ID,
        $1:TRUCK_ID::int as TRUCK_ID,
        $1:ORDER_TS::string as ORDER_TS,
        $1:ORDER_TS_SECONDS::int as ORDER_TS_SECONDS,
        $1:ORDER_DETAIL_ID::int as ORDER_DETAIL_ID,
        $1:LINE_NUMBER::int as LINE_NUMBER,
        $1:TRUCK_BRAND_NAME::string as TRUCK_BRAND_NAME,
        $1:MENU_TYPE::string as MENU_TYPE,
        $1:PRIMARY_CITY::string as PRIMARY_CITY,
        $1:REGION::string as REGION,
        $1:COUNTRY::string as COUNTRY,
        $1:FRANCHISE_FLAG::int as FRANCHISE_FLAG,
        $1:FRANCHISE_ID::int as FRANCHISE_ID,
        $1:FRANCHISEE_FIRST_NAME::string as FRANCHISEE_FIRST_NAME,
        $1:FRANCHISEE_LAST_NAME::string as FRANCHISEE_LAST_NAME,
        $1:LOCATION_ID::int as LOCATION_ID,
        $1:CUSTOMER_ID::double as CUSTOMER_ID,
        $1:FIRST_NAME::string as FIRST_NAME,
        $1:LAST_NAME::string as LAST_NAME,
        $1:E_MAIL::string as E_MAIL,
        $1:PHONE_NUMBER::string as PHONE_NUMBER,
        $1:CHILDREN_COUNT::string as CHILDREN_COUNT,
        $1:GENDER::string as GENDER,
        $1:MARITAL_STATUS::string as MARITAL_STATUS,
        $1:MENU_ITEM_ID::int as MENU_ITEM_ID,
        $1:MENU_ITEM_NAME::string as MENU_ITEM_NAME,
        $1:QUANTITY::int as QUANTITY,
        $1:UNIT_PRICE::double as UNIT_PRICE,
        $1:PRICE::double as PRICE,
        $1:ORDER_AMOUNT::double as ORDER_AMOUNT,
        $1:ORDER_TAX_AMOUNT::double as ORDER_TAX_AMOUNT,
        $1:ORDER_DISCOUNT_AMOUNT::double as ORDER_DISCOUNT_AMOUNT,
        $1:ORDER_TOTAL::double as ORDER_TOTAL
    from
        @rai_demo.tastybytes.tmp/loyalty_orders.parquet
    (FILE_FORMAT => parquet);

    drop stage if exists rai_demo.tastybytes.tmp;
end;
""")

model = rai.Model("TastyBytes")

Record = model.Type("Record", source="RAI_DEMO.TASTYBYTES.ORDERS")

# Count number of rows in the data
with model.query() as select:
    record = Record()
    num_records = aggregates.count(record)
    result = select(num_records)

result

Customer = model.Type("Customer")
Truck = model.Type("Trucks")
Transaction = model.Type("Transaction")
Community = model.Type("Community")
RelevantConnection = model.Type("RelevantConnection")

# Define Customer Type
with model.rule():
    r = Record()
    Customer.add(customer_id=r.customer_id)

# Check total number of customers
with model.query() as select:
    customer = Customer()
    num_records = aggregates.count(customer)
    result = select(num_records)

result

# Define Truck Type
with model.rule():
    r = Record()
    Truck.add(truck_id=r.truck_id)

# Check total number of trucks
with model.query() as select:
    truck = Truck()
    num_records = aggregates.count(truck)
    result = select(num_records)

result

with model.rule():
    r = Record()
    Transaction.add(
        customer_id=r.customer_id,
        order_id=r.order_id,
        truck_id=r.truck_id,
        order_ts=r.order_ts,
        order_ts_seconds=r.order_ts_seconds,
        location_id=r.location_id,
    )

with model.rule():
    t1 = Transaction()
    t2 = Transaction()

    t1.truck_id == t2.truck_id
    t1.customer_id != t2.customer_id
    rel.abs(t1.order_ts_seconds - t2.order_ts_seconds) <= 1200

    t1.connected.add(t2)

with model.query() as select:
    t = Transaction()
    num_records = aggregates.count(t.customer_id, t.order_ts, t.connected, t.connected.customer_id)
    result = select(num_records)

result

with model.rule():
    t = Transaction()
    total_connections = aggregates.count(
        t, per=[t.customer_id, t.connected.customer_id]
    )
    total_connections > 4
    RelevantConnection.add(
        customer_1=Customer(customer_id=t.customer_id),
        customer_2=Customer(customer_id=t.connected.customer_id),
        total_connections=total_connections,
    )

# Get the total occurrences where pairs of customers coexisted together more than once
with model.query() as select:
    relevant_con = RelevantConnection()
    num_records = aggregates.count(relevant_con)
    result = select(num_records)

result

community_graph = Graph(model, undirected=True)

# Add edges to the graph between customers / Nodes will be added automatically
with model.rule():
    relevant_con = RelevantConnection()
    community_graph.Edge.add(
        relevant_con.customer_1,
        relevant_con.customer_2,
        weight=relevant_con.total_connections,
    )

with model.rule():
    customer = Customer()
    community_id = community_graph.compute.louvain(customer)
    customer.set(community_id=community_id)

    community_graph.Node(customer).set(
        community_id=community_id,
        customer_id=customer.customer_id
    )

def generate_random_colors(num_colors):
    """generate random color for each group in large graph."""
    colors = []
    for i in range(num_colors):
        hue = random.random()
        # adjust for better palette
        saturation = random.uniform(0.5, 1.0)
        lightness = random.uniform(0.4, 0.8)

        rgb_color = colorsys.hls_to_rgb(hue, lightness, saturation)
        colors.append('#%02x%02x%02x' % tuple(int(255 * x) for x in rgb_color))
    return colors

data = community_graph.fetch()

num_nodes = len(data["nodes"])
num_edges = len(data["edges"])
print(f"Number of nodes: {num_nodes}, Number of edges: {num_edges}")

community_set = {node['community_id'] for node in data['nodes'].values() if 'community_id' in node}
num_communities = len(community_set)
print(f"Number of communities: {num_communities}")

random_colors = generate_random_colors(num_communities)

community_colors = {}
for i, community in enumerate(community_set):
    community_colors[community] = random_colors[i]

Number of nodes: 3493, Number of edges: 4939
Number of communities: 555

community_graph.visualize(
    three=True,
    node_label_size_factor=1.9,
    use_links_force=True,
    node_hover_neighborhood=True,
    style={
    "node": {
        "color": lambda x : community_colors[x['community_id']],
        "hover": lambda x : f"{x['customer_id']}"
    },
    "edge": {
        "opacity": 0.8,
        "color": "#ccc",
        "hover": lambda x : x['weight']
    }
})

def get_community_id_of_customer( customer_id ):
    mycommunity = None
    for i in data.values():
        for p in i.values():
            try:
                if p['customer_id'] == customer_id:
                    mycommunity = p['community_id']
            except:
                pass
    return mycommunity

community_id_of_interest = get_community_id_of_customer(84087)
community_id_of_interest

117

community_colors_focus = {}
for i in range(1,len(community_colors)+1):
    community_colors_focus[i] = '#808080'

community_colors_focus[community_id_of_interest] = '#FF0000'

#
community_graph.visualize(
    three=True,
    node_label_size_factor=1.9,
    use_links_force=True,
    node_hover_neighborhood=True,
    style={
    "node": {
        "color": lambda x : community_colors_focus[x['community_id']],
        "size" : lambda x : 20 if x['community_id'] == community_id_of_interest else 2,
        "hover": lambda x : f"{x['customer_id']}"
    },
    "edge": {
        "opacity": 0.8,
        "color": "#ccc",

        "hover": lambda x : x['weight']
    }
})

Community Detection and Group Recommendations using RelationalAI

Overview

What you will learn

Let's get started!

Define Model in RelationalAI

Referencing Snowflake Data

Type Declarations

Weaving data into our Model

Defining Customers

Defining Trucks

Defining Transactions

Finding Connections

Reducing connections to 'relevant' ones

Community Detection

Detecting communities using Louvain

Visualize Graph

Let's focus on a particular community.