spring cleanup

This commit is contained in:
grizzly 2025-06-08 18:47:32 -04:00
parent f5712a3a73
commit 848f8cb57d
95 changed files with 46734 additions and 2311 deletions

View file

@ -1,232 +0,0 @@
#import "@preview/polylux:0.3.1": *
#import themes.metropolis: *
#import "@preview/tablex:0.0.5": tablex, hlinex, vlinex, colspanx, rowspanx
#show: metropolis-theme.with(
footer: [CC BY-SA 4.0 Arthur Grisel-Davy]
)
#set text(font: "Fira Sans", weight: "light", size: 20pt)
#show math.equation: set text(font: "Fira Math")
#set strong(delta: 100)
#set par(justify: true)
#title-slide(
author: [Arthur Grisel-Davy, Sebastian Fischmeister],
title: text(size: 30pt, weight: 500)[MAD: One-Shot Machine Activity Detector for Physics-Based Cyber Security],
subtitle: "",
date: "University of Waterloo",
extra: "agriseld@uwaterloo.ca"
)
//#slide(title: "Table of contents")[
// #metropolis-outline
//]
#slide(title: "Introduction")[
#only(1)[#figure(image("images/wein_p1.svg", height: 100%))]
#only(2)[#figure(image("images/wein_p2.svg", height: 100%))]
#only(3)[#figure(image("images/wein_p3.svg", height: 100%))]
#only(4)[#figure(image("images/wein_p4.svg", height: 100%))]
#only(5)[#figure(image("images/wein_p5.svg", height: 100%))]
]
#slide(title: "Problem Statement")[
#align(center)[Given a #text(fill: blue, weight:400 )[discretized time series $t$] and a #text(fill: red, weight:400)[set of patterns $P=\{P_1, dots.h, P_n\}$], identify a mapping $m: NN arrow.r P union lambda$ such that every sample $t[i]$ maps to a pattern in $P union lambda$ with the condition that the sample #text(fill: purple, weight: 400)[matches] an occurrence of the pattern in $t$.]
]
//#slide(title: "Proposed Approach")[
// #only(1)[#figure(image("images/aproach_p1.svg", width: 100%))]
// #only(2)[#figure(image("images/aproach_p2.svg", width: 100%))]
// #only(3)[#figure(image("images/aproach_p3.svg", width: 100%))]
// #only(4)[#figure(image("images/aproach_p4.svg", width: 100%))]
// #only(5)[#figure(image("images/aproach_p5.svg", width: 100%))]
// #only(6)[#figure(image("images/aproach_p6.svg", width: 100%))]
// #only(7)[#figure(image("images/aproach_p7.svg", width: 100%))]
// #only(8)[#figure(image("images/aproach_p8.svg", width: 100%))]
// #only(9)[#figure(image("images/aproach_p9.svg", width: 100%))]
//]
#slide(title: "Proposed Approcah")[
#align(center)[
#text(weight: "bold")[Metric:] The distance between a sample and a pattern is the minimum normalized distance between the pattern and any pattern-length substring that includes the samples.
#v(1cm)
#text(weight: "bold")[Decision:] Each sample receives the label of the closest training pattern.
]
]
//#slide(title: "2D Interpretation")[
//
// #only(1)[#figure(image("images/2d_p1.svg", width: 100%))]
// #only(2)[#figure(image("images/2d_p2.svg", width: 100%))]
// #only(3)[#figure(image("images/2d_p3.svg", width: 100%))]
// #only(4)[#figure(image("images/2d_p4.svg", width: 100%))]
// #only(5)[#figure(image("images/2d_p5.svg", width: 100%))]
//]
#slide(title: "Question")[
#align(center)[Should the algorithm #text(weight: "bold")[always] choose a label?]
]
#slide(title: "2D Interpretation")[
#figure(image("images/2d_p6.svg", width: 100%))
]
#slide(title: "Parameter "+sym.alpha)[
#figure(
image("images/areas.svg", width: 100%)
)
#align(center)[With $alpha lt.triple 2$, the algorithm acquire novelty-detection capability.]
]
#slide(title: "Performance Metric")[
#figure(
image("images/metric.svg", width: 100%)
)
]
#slide(title: "Case Study 1")[
#align(center)[
#figure(
tablex(
columns: (auto, auto, auto),
auto-vlines: false,
repeat-header: false,
align: (left+horizon,right+horizon,right+horizon),
[#text(weight:"bold")[Dataset]], [#text(weight: "bold")[Length]], [#text(weight: "bold")[Number of Occurences]],
[NUCPC-0], [22700], [11],
[NUCPC-1], [7307], [8],
[Generated], [15540], [18],
[WAP-ASUS], [26880], [18],
[WAP-LINKSYS], [22604], [18],
[REFIT-H4A4], [5366], [17],
[REFIT-H4A1], [100000], [142]
),
caption: "Results of the case study 1",
supplement: none,
)
]
]
#slide(title: "Case Study 1 - Results")[
#figure(
image("images/dsd_acc.svg", height: 100%)
)
]
#slide(title: "Case Study 2")[
#image("images/rules_pipeline.svg", width:100%)
]
#slide(title: "Case Study 2")[
#align(center)[
#image("images/2w_experiment.svg", width: 90%)
#tablex(
columns: (auto, auto, auto),
auto-vlines: false,
repeat-header: false,
align: (left+horizon,right+horizon,right+horizon),
[#text(weight:"bold")[Rule ID]], [#text(weight: "bold")[Rule]], [#text(weight: "bold")[Threat]],
[1], ["SLEEP" state only], [Machine takeover, Botnet, Rogue employee],
[2], [No "SLEEP" for more than 8m], [System malfunction],
[3], [One "REBOOT"], [APT, Backdoors],
[4], [No "HIGH" for more than 30s], [Crypto mining, Ransomware, Botnet],
)
]
]
#slide(title: "Case Study 2")[
#figure(
image("images/preds.svg", height: 100%)
)
]
#slide(title: "Case Study 2 - Results")[
#align(center)[
#figure(
tablex(
columns: (auto, auto, auto),
auto-vlines: false,
repeat-header: false,
align: (left+horizon,right+horizon,right+horizon),
[#text(weight:"bold")[Rule]], [#text(weight: "bold")[Violation Ratio]], [#text(weight: "bold")[Micro-$F_1$]],
[Night Sleep], [0.33], [1.0],
[Work Hours], [0.3], [1.0],
[Reboot], [0.48], [1.0],
[No Long High], [0.75], [1.0],
),
caption: "Results of the case study 2",
supplement: none,
)
]
]
#slide(title: "Future Work")[
- Automatic Training (Patterns Extraction) #pause
- Multivariate Support
]
#focus-slide()[
Multivariate Measure
]
#slide(title: "Multivariate Measure Overview")[
#grid(
columns: (auto,auto),
gutter: 3pt,
[#image("images/xpsu_illustration.svg", height:90%)],
[Points of Measure:
- CPU
- 3x Motherboard 3.3V, 5V and 12V
- GPU
- 3x Storage (MOLEX) 3.3V ,5V, and 12V
- Fans?
Not Points of Measure:
- Motherboard-Powered Components]
)
]
#focus-slide()[
Log Verification
]
#slide(title: "Problem Statement")[
#align(center)[
Given a journal of event $J$ and a multivariate time series $t_s$ covering the same time periodand machine, verify that no log was added or removed from the journal.
]
]
#slide(title: "Approaches")[
- Approach 1:
#list([Mine patterns from training journal], [apply DSD to each dimension], [Compare])
#pause
- Approach 2:
#list([Extract patterns from training journal],[Train time-series classification model on multivariate data], [Classify power patterns for each event journal entry])
]
#slide(title: "Experiment Design / Data Collection")[
- What OS to consider?
- What log journal to consider? Linux is easier to collect, windows is more realistic.
#pause
- What activity to simulate?
- Program to fake activity -> Reproducible, Easy
- Real user -> Realistic, Expensive
#pause
- What logs to verify?
- Previous work on merging similar logs into meta-events.
- Should consider all logs or limit to verifyable ones?
#pause
- Whould dataset present real attacks?
- Real attacks faking logs are difficult to perform.
- Faking attack is easy (tamper with $J$) but less realistic.
]