#import "@preview/polylux:0.3.1": *
#import themes.metropolis: *
#import "@preview/tablex:0.0.5": tablex, hlinex, vlinex, colspanx, rowspanx

#show: metropolis-theme.with(
  footer: [CC BY-SA 4.0 Arthur Grisel-Davy]
)

#set text(font: "Fira Sans", weight: "light", size: 20pt)
#show math.equation: set text(font: "Fira Math")
#set strong(delta: 100)
#set par(justify: true)

#title-slide(
  author: [Arthur Grisel-Davy, Sebastian Fischmeister],
  title: text(size: 30pt, weight: 500)[MAD: One-Shot Machine Activity Detector for Physics-Based Cyber Security],
  subtitle: "",
  date: "University of Waterloo",
  extra: "agriseld@uwaterloo.ca"
)

//#slide(title: "Table of contents")[
//  #metropolis-outline
//]

#slide(title: "Introduction")[
    #only(1)[#figure(image("images/wein_p1.svg", height: 100%))]
    #only(2)[#figure(image("images/wein_p2.svg", height: 100%))]
    #only(3)[#figure(image("images/wein_p3.svg", height: 100%))]
    #only(4)[#figure(image("images/wein_p4.svg", height: 100%))]
    #only(5)[#figure(image("images/wein_p5.svg", height: 100%))]
]


#slide(title: "Problem Statement")[
#align(center)[Given a #text(fill: blue, weight:400 )[discretized time series $t$] and a #text(fill: red, weight:400)[set of patterns $P=\{P_1, dots.h, P_n\}$], identify a mapping $m: NN arrow.r P union lambda$ such that every sample $t[i]$ maps to a pattern in $P union lambda$ with the condition that the sample #text(fill: purple, weight: 400)[matches] an occurrence of the pattern in $t$.]
]

//#slide(title: "Proposed Approach")[
//    #only(1)[#figure(image("images/aproach_p1.svg", width: 100%))]
//    #only(2)[#figure(image("images/aproach_p2.svg", width: 100%))]
//    #only(3)[#figure(image("images/aproach_p3.svg", width: 100%))]
//    #only(4)[#figure(image("images/aproach_p4.svg", width: 100%))]
//    #only(5)[#figure(image("images/aproach_p5.svg", width: 100%))]
//    #only(6)[#figure(image("images/aproach_p6.svg", width: 100%))]
//    #only(7)[#figure(image("images/aproach_p7.svg", width: 100%))]
//    #only(8)[#figure(image("images/aproach_p8.svg", width: 100%))]
//    #only(9)[#figure(image("images/aproach_p9.svg", width: 100%))]
//]


#slide(title: "Proposed Approcah")[
#align(center)[
#text(weight: "bold")[Metric:] The distance between a sample and a pattern is the minimum normalized distance between the pattern and any pattern-length substring that includes the samples.
#v(1cm)
#text(weight: "bold")[Decision:] Each sample receives the label of the closest training pattern.
]
]

//#slide(title: "2D Interpretation")[
//    
//    #only(1)[#figure(image("images/2d_p1.svg", width: 100%))]
//    #only(2)[#figure(image("images/2d_p2.svg", width: 100%))]
//    #only(3)[#figure(image("images/2d_p3.svg", width: 100%))]
//    #only(4)[#figure(image("images/2d_p4.svg", width: 100%))]
//    #only(5)[#figure(image("images/2d_p5.svg", width: 100%))]
//]

#slide(title: "Question")[
#align(center)[Should the algorithm #text(weight: "bold")[always] choose a label?]
]

#slide(title: "2D Interpretation")[
    
    #figure(image("images/2d_p6.svg", width: 100%))
]

#slide(title: "Parameter "+sym.alpha)[
    #figure(
        image("images/areas.svg", width: 100%)
    )
#align(center)[With $alpha lt.triple 2$, the algorithm acquire novelty-detection capability.]
]

#slide(title: "Performance Metric")[
#figure(
        image("images/metric.svg", width: 100%)
    )
]


#slide(title: "Case Study 1")[

#align(center)[
#figure(
tablex(
    columns: (auto, auto, auto),
    auto-vlines: false,
    repeat-header: false,
    align: (left+horizon,right+horizon,right+horizon),
    [#text(weight:"bold")[Dataset]], [#text(weight: "bold")[Length]], [#text(weight: "bold")[Number of Occurences]],
    [NUCPC-0], [22700], [11],
    [NUCPC-1], [7307], [8],
    [Generated], [15540], [18],
    [WAP-ASUS], [26880], [18],
    [WAP-LINKSYS], [22604], [18],
    [REFIT-H4A4], [5366], [17],
    [REFIT-H4A1], [100000], [142]
),
caption: "Results of the case study 1",
supplement: none,
)
]

]
#slide(title: "Case Study 1 - Results")[
#figure(
        image("images/dsd_acc.svg", height: 100%)
    )
]


#slide(title: "Case Study 2")[
#image("images/rules_pipeline.svg", width:100%)
]

#slide(title: "Case Study 2")[
#align(center)[
#image("images/2w_experiment.svg", width: 90%)

#tablex(
    columns: (auto, auto, auto),
    auto-vlines: false,
    repeat-header: false,
    align: (left+horizon,right+horizon,right+horizon),
    [#text(weight:"bold")[Rule ID]], [#text(weight: "bold")[Rule]], [#text(weight: "bold")[Threat]],
    [1], ["SLEEP" state only], [Machine takeover, Botnet, Rogue employee],
    [2], [No "SLEEP" for more than 8m], [System malfunction],
    [3], [One "REBOOT"], [APT, Backdoors],
    [4], [No "HIGH" for more than 30s], [Crypto mining, Ransomware, Botnet],
)
]
]


#slide(title: "Case Study 2")[
#figure(
        image("images/preds.svg", height: 100%)
    )
]

#slide(title: "Case Study 2 - Results")[
#align(center)[
#figure(
tablex(
    columns: (auto, auto, auto),
    auto-vlines: false,
    repeat-header: false,
    align: (left+horizon,right+horizon,right+horizon),
    [#text(weight:"bold")[Rule]], [#text(weight: "bold")[Violation Ratio]], [#text(weight: "bold")[Micro-$F_1$]],
    [Night Sleep], [0.33], [1.0],
    [Work Hours], [0.3], [1.0],
    [Reboot], [0.48], [1.0],
    [No Long High], [0.75], [1.0],
),
caption: "Results of the case study 2",
supplement: none,
)
]
]

#slide(title: "Future Work")[
- Automatic Training (Patterns Extraction) #pause
- Multivariate Support
]

#focus-slide()[
    Multivariate Measure
]

#slide(title: "Multivariate Measure Overview")[
    #grid(
        columns: (auto,auto),
        gutter: 3pt,
        [#image("images/xpsu_illustration.svg", height:90%)],
        [Points of Measure:
        - CPU
        - 3x Motherboard 3.3V, 5V and 12V
        - GPU
        - 3x Storage (MOLEX) 3.3V ,5V, and 12V
        - Fans?
    Not Points of Measure:
- Motherboard-Powered Components]
    )
]


#focus-slide()[
    Log Verification
]

#slide(title: "Problem Statement")[
    #align(center)[
        Given a journal of event $J$ and a multivariate time series $t_s$ covering the same time periodand machine, verify that no log was added or removed from the journal.
        ]
]

#slide(title: "Approaches")[
- Approach 1: 
  #list([Mine patterns from training journal], [apply DSD to each dimension], [Compare])
  #pause
- Approach 2:
  #list([Extract patterns from training journal],[Train time-series classification model on multivariate data], [Classify power patterns for each event journal entry])
]

#slide(title: "Experiment Design / Data Collection")[
- What OS to consider?
  - What log journal to consider? Linux is easier to collect, windows is more realistic.
  #pause
- What activity to simulate?
  - Program to fake activity -> Reproducible, Easy
  - Real user -> Realistic, Expensive
  #pause
- What logs to verify?
  - Previous work on merging similar logs into meta-events.
  - Should consider all logs or limit to verifyable ones?
  #pause
- Whould dataset present real attacks?
  - Real attacks faking logs are difficult to perform.
  - Faking attack is easy (tamper with $J$) but less realistic.
]