-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathschema.sql
More file actions
323 lines (272 loc) · 9.7 KB
/
Copy pathschema.sql
File metadata and controls
323 lines (272 loc) · 9.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
CREATE TABLE IF NOT EXISTS uploads (
id TEXT PRIMARY KEY,
given_name TEXT,
platform TEXT,
upload_timestamp REAL,
updated_at REAL,
color TEXT
);
CREATE TABLE IF NOT EXISTS uploaded_files ( -- filled during extraction step
id TEXT PRIMARY KEY,
manifest_file_id TEXT,
upload_id TEXT,
opfs_filename TEXT,
manifest_filename TEXT,
file_hash TEXT,
upload_timestamp REAL,
file_size_bytes INTEGER,
parse_status TEXT,
FOREIGN KEY(upload_id) REFERENCES uploads(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS raw_data ( -- filled during extraction step
id TEXT PRIMARY KEY,
upload_id TEXT,
file_id TEXT,
data JSONTEXT,
line_numbers JSONTEXT, -- JSON list of line numbers where this record appears in the source file (1-indexed)
FOREIGN KEY(upload_id) REFERENCES uploads(id) ON DELETE CASCADE,
FOREIGN KEY(file_id) REFERENCES uploaded_files(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS events ( -- filled during semantic map
id TEXT PRIMARY KEY,
upload_id TEXT,
file_ids JSONTEXT, -- multiple possible after deduplication
raw_data_ids JSONTEXT, -- can be multiple raw data entries that map to the same event, stored as JSON list of raw_data ids
--
timestamp REAL,
event_action TEXT,
event_kind TEXT,
event_category JSONTEXT DEFAULT '[]',
event_type JSONTEXT DEFAULT '[]',
--
message TEXT,
attributes JSONTEXT, --
origin TEXT, -- e.g., "facebook/web", "facebook/mobile_app", "apple/system", "unknown"
tags JSONTEXT DEFAULT "[]",
labels JSONTEXT DEFAULT "[]",
--
treat_as_auth_device BOOLEAN DEFAULT 0,
--
deduplicated BOOLEAN DEFAULT 0,
extra_timestamps JSONTEXT DEFAULT "[]",
--
FOREIGN KEY(upload_id) REFERENCES uploads(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS event_comments (
id TEXT PRIMARY KEY,
event_id TEXT,
comment TEXT,
created_at REAL,
updated_at REAL,
FOREIGN KEY(event_id) REFERENCES events(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS devices_raw ( -- filled during semantic map
id TEXT PRIMARY KEY,
upload_id TEXT,
file_id TEXT,
raw_data_id TEXT,
--
entity_type TEXT,
event_kind TEXT,
event_category JSONTEXT DEFAULT '[]',
--
attributes JSONTEXT,
origin TEXT, -- e.g., "facebook/web", "facebook/mobile_app", "apple/system", "unknown"
--
FOREIGN KEY(upload_id) REFERENCES uploads(id) ON DELETE CASCADE,
FOREIGN KEY(file_id) REFERENCES uploaded_files(id) ON DELETE CASCADE,
FOREIGN KEY(raw_data_id) REFERENCES raw_data(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS atomic_devices ( -- hard merge based on static device identifiers. user cannot edit this.
id TEXT PRIMARY KEY,
upload_ids JSONTEXT NOT NULL, -- JSON list of uploads that contributed to this merged device
file_ids JSONTEXT NOT NULL, -- ^^ for uploaded_files.id
devices_raw_ids JSONTEXT NOT NULL, -- ^^ for devices_raw.id
--
attributes JSONTEXT, -- merged attributes
origins JSONTEXT, -- JSON list of origin values (e.g., ["facebook/web", "facebook/mobile_app"])
specificity INTEGER DEFAULT 1 -- 1=generic, 2=model+version, 3=hard_id
);
CREATE TABLE IF NOT EXISTS device_profiles (
id TEXT PRIMARY KEY,
atomic_devices_ids JSONTEXT NOT NULL, -- JSON list of atomic_devices.id that are in this cluster
--
attributes JSONTEXT, -- merged attributes from all atomics
specificity INTEGER DEFAULT 1, -- max specificity from atomics
model TEXT, -- best model name from attributes
manufacturer TEXT, -- best manufacturer from attributes
origins JSONTEXT, -- merged origins from all atomics
--
system_soft_merge BOOLEAN DEFAULT 0, -- 1 if profile created by system soft-merge AND has 2+ atomics, else 0
is_generic BOOLEAN DEFAULT 0, -- 1 if specificity < 2 and Apple
--
user_label TEXT,
notes TEXT,
--
created_at REAL,
updated_at REAL,
--
tags JSONTEXT DEFAULT "[]",
labels JSONTEXT DEFAULT "[]"
);
CREATE TABLE IF NOT EXISTS device_profile_comments (
id TEXT PRIMARY KEY,
device_profile_id TEXT,
comment TEXT,
created_at REAL,
updated_at REAL,
FOREIGN KEY(device_profile_id) REFERENCES device_profiles(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS event_assoc (
event_id TEXT,
atomic_device_id TEXT,
event_specificity INTEGER, -- 1=generic, 2=model+version, 3=hard_id
match_reason TEXT,
PRIMARY KEY (event_id, atomic_device_id),
FOREIGN KEY(event_id) REFERENCES events(id) ON DELETE CASCADE,
FOREIGN KEY(atomic_device_id) REFERENCES atomic_devices(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS device_instance_edges ( -- for device/event grouping
id_a TEXT,
id_b TEXT, -- dropped from main dataframe in level0
type TEXT,
provenance TEXT,
upload_id TEXT,
UNIQUE(id_a, id_b, type),
FOREIGN KEY(upload_id) REFERENCES uploads(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS device_instances (
id TEXT PRIMARY KEY,
upload_id TEXT,
--
platform TEXT,
manufacturer TEXT,
model TEXT,
client_name TEXT,
os_name TEXT,
os_type TEXT,
--
apple_masking TEXT,
first_seen REAL,
last_seen REAL,
last_seen_dt TEXT,
event_count INTEGER,
latest_os_version TEXT,
latest_client_version TEXT,
latest_client_ip TEXT,
--
os_versions TEXT, -- TODO jsonstring????
client_versions TEXT, -- -- TODO jsonstring????
client_ips TEXT, -- -- TODO jsonstring????
locations TEXT, -- -- TODO jsonstring????
--
created_at REAL,
--
FOREIGN KEY(upload_id) REFERENCES uploads(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS device_instance_events (
device_instance_id TEXT,
event_id TEXT,
PRIMARY KEY (device_instance_id, event_id),
FOREIGN KEY(device_instance_id) REFERENCES device_instances(id) ON DELETE CASCADE,
FOREIGN KEY(event_id) REFERENCES events(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS device_instance_raw_devices (
device_instance_id TEXT,
devices_raw_id TEXT,
PRIMARY KEY (device_instance_id, devices_raw_id),
FOREIGN KEY(device_instance_id) REFERENCES device_instances(id) ON DELETE CASCADE,
FOREIGN KEY(devices_raw_id) REFERENCES devices_raw(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS device_profiles_v2 (
id TEXT PRIMARY KEY,
model TEXT,
manufacturer TEXT,
os_type TEXT,
user_label TEXT,
notes TEXT,
created_at REAL,
updated_at REAL
);
CREATE TABLE IF NOT EXISTS device_profile_instances (
device_profile_id TEXT,
device_instance_id TEXT,
PRIMARY KEY (device_profile_id, device_instance_id),
FOREIGN KEY(device_profile_id) REFERENCES device_profiles_v2(id) ON DELETE CASCADE,
FOREIGN KEY(device_instance_id) REFERENCES device_instances(id) ON DELETE CASCADE
);
-----------------------------------------
-------- VIEWS --------
-----------------------------------------
-- view for Events Mappings
DROP VIEW IF EXISTS v_event_field_mappings;
CREATE VIEW IF NOT EXISTS v_event_field_mappings AS
-- static columns
SELECT 'id' AS field, 'text' AS type
UNION SELECT 'timestamp', 'timestamp'
UNION SELECT 'message', 'text'
UNION SELECT 'event_category', 'category'
UNION SELECT 'event_action', 'text'
UNION SELECT 'event_kind', 'category'
UNION SELECT 'platform', 'text'
UNION
-- dynamic from JSON attributes
SELECT DISTINCT key AS field, 'text' AS type
FROM events, json_each(events.attributes)
WHERE events.attributes IS NOT NULL AND events.attributes != '';
-- view for Auth Devices Mappings
DROP VIEW IF EXISTS v_device_field_mappings;
CREATE VIEW IF NOT EXISTS v_device_field_mappings AS
-- static columns
SELECT 'id' AS field, 'text' AS type
UNION SELECT 'entity_type', 'category'
UNION SELECT 'event_kind', 'category'
UNION SELECT 'event_category', 'category'
UNION SELECT 'platform', 'text'
UNION
-- dynamic from JSON attributes
SELECT DISTINCT key AS field, 'text' AS type
FROM devices_raw, json_each(devices_raw.attributes)
WHERE devices_raw.attributes IS NOT NULL AND devices_raw.attributes != '';
-- all event action types
CREATE VIEW IF NOT EXISTS v_event_actions AS
SELECT DISTINCT event_action
FROM events
WHERE event_action IS NOT NULL AND event_action != '';
DROP VIEW IF EXISTS v_device_profiles;
CREATE VIEW v_device_profiles AS
SELECT
dg.id AS profile_id,
json_group_array(json(ad.attributes)) AS attributes,
json_group_array(ad.specificity) AS specificity,
json(json_group_array(DISTINCT j2.value)) AS origins
FROM device_profiles dg
JOIN json_each(dg.atomic_devices_ids) as j ON 1=1
JOIN atomic_devices ad ON ad.id = j.value
JOIN json_each(ad.origins) as j2 ON 1=1
GROUP BY dg.id;
DROP VIEW IF EXISTS v_events2profile;
CREATE VIEW v_events2profile AS
SELECT
ea.event_id,
dp.id AS device_profile_id,
ea.match_reason,
ea.event_specificity
FROM event_assoc ea
JOIN device_profiles dp ON 1=1
JOIN json_each(dp.atomic_devices_ids) as j ON j.value = ea.atomic_device_id;
DROP VIEW IF EXISTS v_events2profile_indexed;
CREATE VIEW v_events2profile_indexed AS
SELECT
die.event_id,
json_group_array(
json_object(
'id', dp.id,
'model', COALESCE(dp.model, ''),
'user_label', COALESCE(dp.user_label, '')
)
) AS device_profiles_data
FROM device_instance_events die
JOIN device_profile_instances dpi ON die.device_instance_id = dpi.device_instance_id
JOIN device_profiles_v2 dp ON dpi.device_profile_id = dp.id
GROUP BY die.event_id;