#import "@preview/polylux:0.3.1": * #import themes.metropolis: * #import "@preview/tablex:0.0.5": tablex, hlinex, vlinex, colspanx, rowspanx #show: metropolis-theme.with( footer: [CC BY-SA 4.0 Arthur Grisel-Davy] ) #set text(font: "Fira Sans", weight: "light", size: 20pt) #show math.equation: set text(font: "Fira Math") #set strong(delta: 100) #set par(justify: true) #title-slide( author: [Arthur Grisel-Davy, Sebastian Fischmeister], title: text(size: 30pt, weight: 500)[MAD: One-Shot Machine Activity Detector for Physics-Based Cyber Security], subtitle: "", date: "University of Waterloo", extra: "agriseld@uwaterloo.ca" ) //#slide(title: "Table of contents")[ // #metropolis-outline //] #slide(title: "Introduction")[ #only(1)[#figure(image("images/wein_p1.svg", height: 100%))] #only(2)[#figure(image("images/wein_p2.svg", height: 100%))] #only(3)[#figure(image("images/wein_p3.svg", height: 100%))] #only(4)[#figure(image("images/wein_p4.svg", height: 100%))] #only(5)[#figure(image("images/wein_p5.svg", height: 100%))] ] #slide(title: "Problem Statement")[ #align(center)[Given a #text(fill: blue, weight:400 )[discretized time series $t$] and a #text(fill: red, weight:400)[set of patterns $P=\{P_1, dots.h, P_n\}$], identify a mapping $m: NN arrow.r P union lambda$ such that every sample $t[i]$ maps to a pattern in $P union lambda$ with the condition that the sample #text(fill: purple, weight: 400)[matches] an occurrence of the pattern in $t$.] ] //#slide(title: "Proposed Approach")[ // #only(1)[#figure(image("images/aproach_p1.svg", width: 100%))] // #only(2)[#figure(image("images/aproach_p2.svg", width: 100%))] // #only(3)[#figure(image("images/aproach_p3.svg", width: 100%))] // #only(4)[#figure(image("images/aproach_p4.svg", width: 100%))] // #only(5)[#figure(image("images/aproach_p5.svg", width: 100%))] // #only(6)[#figure(image("images/aproach_p6.svg", width: 100%))] // #only(7)[#figure(image("images/aproach_p7.svg", width: 100%))] // #only(8)[#figure(image("images/aproach_p8.svg", width: 100%))] // #only(9)[#figure(image("images/aproach_p9.svg", width: 100%))] //] #slide(title: "Proposed Approcah")[ #align(center)[ #text(weight: "bold")[Metric:] The distance between a sample and a pattern is the minimum normalized distance between the pattern and any pattern-length substring that includes the samples. #v(1cm) #text(weight: "bold")[Decision:] Each sample receives the label of the closest training pattern. ] ] //#slide(title: "2D Interpretation")[ // // #only(1)[#figure(image("images/2d_p1.svg", width: 100%))] // #only(2)[#figure(image("images/2d_p2.svg", width: 100%))] // #only(3)[#figure(image("images/2d_p3.svg", width: 100%))] // #only(4)[#figure(image("images/2d_p4.svg", width: 100%))] // #only(5)[#figure(image("images/2d_p5.svg", width: 100%))] //] #slide(title: "Question")[ #align(center)[Should the algorithm #text(weight: "bold")[always] choose a label?] ] #slide(title: "2D Interpretation")[ #figure(image("images/2d_p6.svg", width: 100%)) ] #slide(title: "Parameter "+sym.alpha)[ #figure( image("images/areas.svg", width: 100%) ) #align(center)[With $alpha lt.triple 2$, the algorithm acquire novelty-detection capability.] ] #slide(title: "Performance Metric")[ #figure( image("images/metric.svg", width: 100%) ) ] #slide(title: "Case Study 1")[ #align(center)[ #figure( tablex( columns: (auto, auto, auto), auto-vlines: false, repeat-header: false, align: (left+horizon,right+horizon,right+horizon), [#text(weight:"bold")[Dataset]], [#text(weight: "bold")[Length]], [#text(weight: "bold")[Number of Occurences]], [NUCPC-0], [22700], [11], [NUCPC-1], [7307], [8], [Generated], [15540], [18], [WAP-ASUS], [26880], [18], [WAP-LINKSYS], [22604], [18], [REFIT-H4A4], [5366], [17], [REFIT-H4A1], [100000], [142] ), caption: "Results of the case study 1", supplement: none, ) ] ] #slide(title: "Case Study 1 - Results")[ #figure( image("images/dsd_acc.svg", height: 100%) ) ] #slide(title: "Case Study 2")[ #image("images/rules_pipeline.svg", width:100%) ] #slide(title: "Case Study 2")[ #align(center)[ #image("images/2w_experiment.svg", width: 90%) #tablex( columns: (auto, auto, auto), auto-vlines: false, repeat-header: false, align: (left+horizon,right+horizon,right+horizon), [#text(weight:"bold")[Rule ID]], [#text(weight: "bold")[Rule]], [#text(weight: "bold")[Threat]], [1], ["SLEEP" state only], [Machine takeover, Botnet, Rogue employee], [2], [No "SLEEP" for more than 8m], [System malfunction], [3], [One "REBOOT"], [APT, Backdoors], [4], [No "HIGH" for more than 30s], [Crypto mining, Ransomware, Botnet], ) ] ] #slide(title: "Case Study 2")[ #figure( image("images/preds.svg", height: 100%) ) ] #slide(title: "Case Study 2 - Results")[ #align(center)[ #figure( tablex( columns: (auto, auto, auto), auto-vlines: false, repeat-header: false, align: (left+horizon,right+horizon,right+horizon), [#text(weight:"bold")[Rule]], [#text(weight: "bold")[Violation Ratio]], [#text(weight: "bold")[Micro-$F_1$]], [Night Sleep], [0.33], [1.0], [Work Hours], [0.3], [1.0], [Reboot], [0.48], [1.0], [No Long High], [0.75], [1.0], ), caption: "Results of the case study 2", supplement: none, ) ] ] #slide(title: "Future Work")[ - Automatic Training (Patterns Extraction) #pause - Multivariate Support ] #focus-slide()[ Multivariate Measure ] #slide(title: "Multivariate Measure Overview")[ #grid( columns: (auto,auto), gutter: 3pt, [#image("images/xpsu_illustration.svg", height:90%)], [Points of Measure: - CPU - 3x Motherboard 3.3V, 5V and 12V - GPU - 3x Storage (MOLEX) 3.3V ,5V, and 12V - Fans? Not Points of Measure: - Motherboard-Powered Components] ) ] #focus-slide()[ Log Verification ] #slide(title: "Problem Statement")[ #align(center)[ Given a journal of event $J$ and a multivariate time series $t_s$ covering the same time periodand machine, verify that no log was added or removed from the journal. ] ] #slide(title: "Approaches")[ - Approach 1: #list([Mine patterns from training journal], [apply DSD to each dimension], [Compare]) #pause - Approach 2: #list([Extract patterns from training journal],[Train time-series classification model on multivariate data], [Classify power patterns for each event journal entry]) ] #slide(title: "Experiment Design / Data Collection")[ - What OS to consider? - What log journal to consider? Linux is easier to collect, windows is more realistic. #pause - What activity to simulate? - Program to fake activity -> Reproducible, Easy - Real user -> Realistic, Expensive #pause - What logs to verify? - Previous work on merging similar logs into meta-events. - Should consider all logs or limit to verifyable ones? #pause - Whould dataset present real attacks? - Real attacks faking logs are difficult to perform. - Faking attack is easy (tamper with $J$) but less realistic. ]