source .venv/bin/activate
python -m pip install jupyter
jupyter lab

import relationalai as rai

from relationalai.std.graphs import Graph
from relationalai.std import aggregates as agg
from relationalai.std import alias, dates, Vars

from typing import Tuple
import pandas as pd

provider = rai.Provider()

provider.sql("""
begin
    create schema if not exists RAI_DEMO.FRAUD_DETECTION;

    create or replace table RAI_DEMO.FRAUD_DETECTION.USER_LOGS (
        USER_ID VARCHAR(16777216),
        TIMESTAMP TIMESTAMP_NTZ(9),
        PUBLIC_IP VARCHAR(16777216),
        CITY VARCHAR(16777216),
        DEVICE_TYPE VARCHAR(16777216),
        DEVICE_ID VARCHAR(16777216)
    );

    insert into RAI_DEMO.FRAUD_DETECTION.USER_LOGS (USER_ID,TIMESTAMP,PUBLIC_IP,CITY,DEVICE_TYPE,DEVICE_ID)
    values
        ('aunt_judy','2024-06-01T20:31:46Z','69.37.129.87','New York','tablet','judys_ipad'),
        ('aunt_judy','2024-06-03T14:37:59Z','69.37.129.87','New York','tablet','judys_ipad'),
        ('aunt_judy','2024-06-05T06:07:56Z','69.37.129.87','New York','tablet','judys_ipad'),
        ('aunt_judy','2024-06-08T06:41:19Z','69.37.129.87','New York','tablet','judys_ipad'),
        ('aunt_judy','2024-06-12T05:09:50Z','69.37.129.87','New York','tablet','judys_ipad'),
        ('aunt_judy','2024-06-14T12:02:11Z','69.37.129.87','New York','tablet','judys_ipad'),
        ('aunt_judy','2024-06-16T08:32:16Z','69.37.129.87','New York','tablet','judys_ipad'),
        ('travelling_salesman','2024-06-04T19:43:18Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-18T01:59:39Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-21T07:21:14Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-26T16:24:55Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-06T18:03:44Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-07T20:15:24Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-11T08:53:45Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-24T08:55:45Z','168.191.205.135','Los Angeles','desktop','ts imac'),
        ('travelling_salesman','2024-06-24T08:59:45Z','168.191.205.135','Los Angeles','tablet','ts ipad'),
        ('travelling_salesman','2024-06-21T09:31:45Z','151.239.231.47','San Francisco','tablet','ts ipad'),
        ('travelling_salesman','2024-06-01T07:07:10Z','146.157.47.128','Long Beach','tablet','ts ipad'),
        ('travelling_salesman','2024-06-14T10:21:57Z','67.148.79.79','San Diego','tablet','ts ipad'),
        ('travelling_salesman','2024-06-12T18:08:20Z','228.17.231.201','Fresno','tablet','ts ipad'),
        ('travelling_salesman','2024-06-17T13:59:40Z','33.25.1.249','Oakland','tablet','ts ipad'),
        ('travelling_salesman','2024-06-29T05:35:51Z','171.167.22.118','San Jose','tablet','ts ipad'),
        ('travelling_salesman','2024-06-05T14:35:24Z','202.17.239.114','Sacramento','tablet','ts ipad'),
        ('travelling_salesman','2024-06-08T11:29:11Z','233.189.189.48','Bakersfield','tablet','ts ipad'),
        ('catch_me_if_you_can','2024-06-14T14:38:29Z','38.45.1.249','Los Angeles','desktop','dannys laptop'),
        ('catch_me_if_you_can','2024-06-14T14:50:29Z','99.31.19.07','New York','desktop','franks macbook'),
        ('catch_me_if_you_can','2024-06-14T14:10:29Z','171.231.231.97','Atlanta','tablet','annys ipad'),
        ('catch_me_if_you_can','2024-06-14T14:20:29Z','100.33.3.205','Atlanta','desktop','marys computer'),
        ('catch_me_if_you_can','2024-06-20T09:00:15Z','38.45.1.249','Los Angeles','desktop','dannys laptop'),
        ('catch_me_if_you_can','2024-06-20T09:40:15Z','99.31.19.07','New York','desktop','franks macbook'),
        ('catch_me_if_you_can','2024-06-20T10:13:15Z','171.231.231.97','Atlanta','tablet','annys ipad'),
        ('catch_me_if_you_can','2024-06-20T09:21:15Z','100.33.3.205','Atlanta','desktop','marys computer'),
        ('catch_me_if_you_can','2024-06-20T10:32:15Z','38.45.1.249','Los Angeles','desktop','dannys laptop'),
        ('catch_me_if_you_can','2024-06-17T09:47:24Z','99.31.19.07','New York','desktop','franks macbook'),
        ('catch_me_if_you_can','2024-06-17T09:33:24Z','171.231.231.97','Atlanta','tablet','annys ipad'),
        ('catch_me_if_you_can','2024-06-17T09:26:24Z','100.33.3.205','Atlanta','desktop','marys computer'),
        ('catch_me_if_you_can','2024-06-17T11:22:24Z','38.45.1.249','Los Angeles','desktop','dannys laptop'),
        ('catch_me_if_you_can','2024-06-17T11:27:24Z','99.31.19.07','New York','desktop','franks macbook'),
        ('catch_me_if_you_can','2024-06-23T10:31:59Z','120.35.6.505','New York','desktop','tommys chromebook'),
        ('catch_me_if_you_can','2024-06-06T16:36:27Z','120.35.6.505','New York','desktop','tommys chromebook'),
        ('catch_me_if_you_can','2024-06-02T19:12:10Z','38.45.1.249','Los Angeles','desktop','dannys laptop'),
        ('catch_me_if_you_can','2024-06-17T05:54:51Z','99.31.19.07','New York','desktop','franks macbook');
end;
""")

model = rai.Model("login_sharing", ensure_change_tracking=True)

Event = model.Type("Event", source="RAI_DEMO.FRAUD_DETECTION.USER_LOGS")

Event.known_properties()

['snowflake_id',
 'device_id',
 'public_ip',
 'user_id',
 'city',
 'device_type',
 'timestamp']

User = model.Type("User")
Device = model.Type("Device")
IPAddress = model.Type("IPAddress")

with model.rule():
    e = Event()
    e.set(
        to_user = User.add(id=e.user_id),
        to_device = Device.add(id=e.device_id, type=e.device_type),
        to_ip_address = IPAddress.add(id=e.public_ip)
    )

with model.rule():
    u = User()
    e = Event(to_user = u)
    u.has_device.add(e.to_device)
    u.has_ip_address.add(e.to_ip_address)

style = {
    "node": {
        "color": lambda n : 'firebrick' if n.get('focus') and n['type'] == 'User' else
           {'User': 'yellow', 'tablet': 'lightsteelblue', 'desktop': 'steelblue', 'IPAddress': 'grey'}[n['type']],
        "hover": lambda n: n['type'],
        "size": lambda n: {'User': 30, 'tablet': 15, 'desktop': 25, 'IPAddress': 15}[n['type']],
        "shape": lambda n: 'circle' if n['type'] == 'User' else 'rectangle',    },
    "edge": {
        "color":'grey',
    }
}

input_graph = Graph(model)
Node, Edge = input_graph.Node, input_graph.Edge

Node.extend(User, label=User.id, type="User")
Node.extend(Device, label=Device.id, type=Device.type)
Node.extend(IPAddress, label=IPAddress.id, type="IPAddress")

Edge.extend(User.has_device)
Edge.extend(User.has_ip_address)

input_graph.visualize(three = False, node_label_size_factor = 1.5, edge_size_factor = 0.5, style = style).display(inline = True)

with model.rule():
    d = Device()
    e = Event(to_device = d)
    d.has_ip_address.add(e.to_ip_address)

by_device_graph = Graph(model)
Node, Edge = by_device_graph.Node, by_device_graph.Edge

Node.extend(User, label=User.id, type="User")
Node.extend(Device, label=Device.id, type=Device.type)
Node.extend(IPAddress, label=IPAddress.id, type="IPAddress")

Edge.extend(User.has_device)
Edge.extend(Device.has_ip_address)

by_device_graph.visualize(three = False, node_label_size_factor = 1.5, edge_size_factor = 0.5, style = style).display(inline = True)

with model.query() as select:
    u = User()
    c_d = agg.count(u.has_device, per = [u])
    c_ip = agg.count(u.has_ip_address, per = [u])
    res = select(alias(u.id,"user"), alias(c_d,"nr.devices"), alias(c_ip,"nr. ip addresses"))
res

with model.query() as select:
    d = Device()
    u = User(has_device = d)
    c_d = agg.count(d, per = [u, d.type])
    res = select.distinct(alias(u.id,"user"), d.type, alias(c_d, "nr. devices"))
res

SuspiciousUser = model.Type("SuspiciousUser")

with model.rule():
    d = Device()
    u = User(has_device = d)
    c_d = agg.count(d, per = [u, d.type])
    c_d > 2
    u.set(SuspiciousUser)

with model.query() as select:
    u = SuspiciousUser()
    res = select(u.id)
res

with model.rule():
    u = SuspiciousUser()
    e = Event(to_user = u)
    r = agg.rank_asc(e.timestamp, u)
    e.set(rank = r)

with model.rule():
    u = SuspiciousUser()
    e1 = Event(to_user = u)
    e2 = Event(to_user = u)
    e1.rank == e2.rank - 1
    e1.set(has_next_event = e2)

with model.query() as select:
    u = SuspiciousUser()
    e = Event(to_user = u)
    res = select(u.id, e.rank, e.timestamp, e.city, alias(e.has_next_event.timestamp,"timestamp next event"), alias(e.has_next_event.city,"city next event"))
res

# suspicious time interval
suspicious_interval_in_milliseconds = 1200000 # = 20minutes
FraudulentUser = model.Type("FraudulentUser")

with model.rule():
    u = SuspiciousUser()
    with model.found():
        e = Event(to_user = u)
        e.has_next_event.timestamp - e.timestamp == dates.milliseconds(diff_ms := Vars(1))
        diff_ms <= suspicious_interval_in_milliseconds
        e.city != e.has_next_event.city
    u.set(FraudulentUser)

with model.query() as select:
    u = FraudulentUser()
    res = select(u.id)
res

@model.export("rai_demo.fraud_detection")
def fraudulent_logins() -> Tuple[str]:
    u = FraudulentUser()
    return u.id

pd.DataFrame(model.resources._exec(f"call rai_demo.fraud_detection.fraudulent_logins();"), columns = ["id"])

Detecting sharing of user accounts

Overview

Let's get started!

Define Model in RelationalAI

Referencing Snowflake Data

Weaving data into our model

Visualizing the input data

Identifying suspicious user activity

Confirming Fraudulent Activity

Step 1 - Derive next event

Step 2 - Derive Fraudulent Users

What to do next?

user	type	nr. devices
aunt_judy	tablet	1
catch_me_if_you_can	desktop	4
catch_me_if_you_can	tablet	1
travelling_salesman	desktop	1
travelling_salesman	tablet	1

id	rank	timestamp	city	timestamp next event	city next event
catch_me_if_you_can	1	2024-06-02 19:12:10	Los Angeles	2024-06-06 16:36:27	New York
catch_me_if_you_can	2	2024-06-06 16:36:27	New York	2024-06-14 14:10:29	Atlanta
catch_me_if_you_can	3	2024-06-14 14:10:29	Atlanta	2024-06-14 14:20:29	Atlanta
catch_me_if_you_can	4	2024-06-14 14:20:29	Atlanta	2024-06-14 14:38:29	Los Angeles
catch_me_if_you_can	5	2024-06-14 14:38:29	Los Angeles	2024-06-14 14:50:29	New York
catch_me_if_you_can	6	2024-06-14 14:50:29	New York	2024-06-17 05:54:51	New York
catch_me_if_you_can	7	2024-06-17 05:54:51	New York	2024-06-17 09:26:24	Atlanta
catch_me_if_you_can	8	2024-06-17 09:26:24	Atlanta	2024-06-17 09:33:24	Atlanta
catch_me_if_you_can	9	2024-06-17 09:33:24	Atlanta	2024-06-17 09:47:24	New York
catch_me_if_you_can	10	2024-06-17 09:47:24	New York	2024-06-17 11:22:24	Los Angeles
catch_me_if_you_can	11	2024-06-17 11:22:24	Los Angeles	2024-06-17 11:27:24	New York
catch_me_if_you_can	12	2024-06-17 11:27:24	New York	2024-06-20 09:00:15	Los Angeles
catch_me_if_you_can	13	2024-06-20 09:00:15	Los Angeles	2024-06-20 09:21:15	Atlanta
catch_me_if_you_can	14	2024-06-20 09:21:15	Atlanta	2024-06-20 09:40:15	New York
catch_me_if_you_can	15	2024-06-20 09:40:15	New York	2024-06-20 10:13:15	Atlanta
catch_me_if_you_can	16	2024-06-20 10:13:15	Atlanta	2024-06-20 10:32:15	Los Angeles
catch_me_if_you_can	17	2024-06-20 10:32:15	Los Angeles	2024-06-23 10:31:59	New York