fix alpha impact figure
This commit is contained in:
parent
87820c8e80
commit
60a01f8e46
2 changed files with 97 additions and 88 deletions
|
|
@ -7,8 +7,11 @@
|
||||||
viewBox="0 0 609.24652 216.63609"
|
viewBox="0 0 609.24652 216.63609"
|
||||||
version="1.1"
|
version="1.1"
|
||||||
id="svg5"
|
id="svg5"
|
||||||
inkscape:version="1.2.2 (1:1.2.2+202305151915+b0a8486541)"
|
inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
|
||||||
sodipodi:docname="2w_experiment.svg"
|
sodipodi:docname="2w_experiment.svg"
|
||||||
|
inkscape:export-filename="2w_experiment.pdf"
|
||||||
|
inkscape:export-xdpi="175.618"
|
||||||
|
inkscape:export-ydpi="175.618"
|
||||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
xmlns="http://www.w3.org/2000/svg"
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
|
@ -25,12 +28,12 @@
|
||||||
inkscape:document-units="mm"
|
inkscape:document-units="mm"
|
||||||
showgrid="false"
|
showgrid="false"
|
||||||
inkscape:zoom="0.70710678"
|
inkscape:zoom="0.70710678"
|
||||||
inkscape:cx="1112.279"
|
inkscape:cx="1268.5496"
|
||||||
inkscape:cy="461.74073"
|
inkscape:cy="458.20519"
|
||||||
inkscape:window-width="1920"
|
inkscape:window-width="1920"
|
||||||
inkscape:window-height="1016"
|
inkscape:window-height="1056"
|
||||||
inkscape:window-x="1920"
|
inkscape:window-x="1920"
|
||||||
inkscape:window-y="27"
|
inkscape:window-y="0"
|
||||||
inkscape:window-maximized="1"
|
inkscape:window-maximized="1"
|
||||||
inkscape:current-layer="layer1" />
|
inkscape:current-layer="layer1" />
|
||||||
<defs
|
<defs
|
||||||
|
|
@ -503,12 +506,12 @@
|
||||||
style="font-size:10px;stroke-width:0.264583"
|
style="font-size:10px;stroke-width:0.264583"
|
||||||
x="39.374374"
|
x="39.374374"
|
||||||
y="257.96527"
|
y="257.96527"
|
||||||
id="tspan3081">2: Exactly one "reboot" occurence.</tspan><tspan
|
id="tspan3081">2: Exactly one "reboot" occurence and no "high" occurence.</tspan><tspan
|
||||||
sodipodi:role="line"
|
sodipodi:role="line"
|
||||||
style="font-size:10px;stroke-width:0.264583"
|
style="font-size:10px;stroke-width:0.264583"
|
||||||
x="39.374374"
|
x="39.374374"
|
||||||
y="270.46527"
|
y="270.46527"
|
||||||
id="tspan3091">3: There should not be "high" states for more than 30s.</tspan><tspan
|
id="tspan3091">3: There should not be "high" states for more than 2m.</tspan><tspan
|
||||||
sodipodi:role="line"
|
sodipodi:role="line"
|
||||||
style="font-size:10px;stroke-width:0.264583"
|
style="font-size:10px;stroke-width:0.264583"
|
||||||
x="39.374374"
|
x="39.374374"
|
||||||
|
|
@ -601,7 +604,10 @@
|
||||||
style="font-size:12.0059px;line-height:1.25;font-family:'STIX Two Text';-inkscape-font-specification:'STIX Two Text';letter-spacing:0px;word-spacing:0px;stroke-width:0.0794137"
|
style="font-size:12.0059px;line-height:1.25;font-family:'STIX Two Text';-inkscape-font-specification:'STIX Two Text';letter-spacing:0px;word-spacing:0px;stroke-width:0.0794137"
|
||||||
x="476.69379"
|
x="476.69379"
|
||||||
y="118.70661"
|
y="118.70661"
|
||||||
id="text3272"><tspan
|
id="text3272"
|
||||||
|
inkscape:export-filename="text3272.pdf"
|
||||||
|
inkscape:export-xdpi="175.618"
|
||||||
|
inkscape:export-ydpi="175.618"><tspan
|
||||||
sodipodi:role="line"
|
sodipodi:role="line"
|
||||||
id="tspan3270"
|
id="tspan3270"
|
||||||
style="stroke-width:0.0794137"
|
style="stroke-width:0.0794137"
|
||||||
|
|
@ -628,6 +634,6 @@
|
||||||
id="tspan3278"
|
id="tspan3278"
|
||||||
style="stroke-width:0.0794137"
|
style="stroke-width:0.0794137"
|
||||||
x="555.52734"
|
x="555.52734"
|
||||||
y="118.62257">0</tspan></text>
|
y="118.62257">4</tspan></text>
|
||||||
</g>
|
</g>
|
||||||
</svg>
|
</svg>
|
||||||
|
|
|
||||||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
161
DSD/qrs/main.tex
161
DSD/qrs/main.tex
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
\documentclass[conference]{IEEEconf}
|
\documentclass[conference]{IEEEconf}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -33,7 +32,7 @@
|
||||||
\input{acronyms}
|
\input{acronyms}
|
||||||
\title{\textbf{\Large MAD: One-Shot Machine Activity Detector for Physics-Based Cyber Security\\}}
|
\title{\textbf{\Large MAD: One-Shot Machine Activity Detector for Physics-Based Cyber Security\\}}
|
||||||
|
|
||||||
\author{Arthur Grisel-Davy$^{1,*}$, Sebastian Fischmeister$^{2}$\\
|
\author{Arthur Grisel-Davy$^{1,*}$, Sebastian Fischmeister$^{1}$\\
|
||||||
\normalsize $^{1}$University of Waterloo, Ontario, Canada\\
|
\normalsize $^{1}$University of Waterloo, Ontario, Canada\\
|
||||||
\normalsize agriseld@uwaterloo.ca, sfishme@uwaterloo.ca\\
|
\normalsize agriseld@uwaterloo.ca, sfishme@uwaterloo.ca\\
|
||||||
\normalsize *corresponding author
|
\normalsize *corresponding author
|
||||||
|
|
@ -194,6 +193,7 @@ The pattern $\lambda$ is the \textit{unknown} pattern assigned to the samples in
|
||||||
\label{fig:overview}
|
\label{fig:overview}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
\section{Proposed Solution: MAD}\label{sec:solution}
|
\section{Proposed Solution: MAD}\label{sec:solution}
|
||||||
\gls{mad}'s core idea separates it from other traditional sliding window algorithm.
|
\gls{mad}'s core idea separates it from other traditional sliding window algorithm.
|
||||||
In \gls{mad}, the sample window around the sample to classify dynamically adapts for optimal context selection.
|
In \gls{mad}, the sample window around the sample to classify dynamically adapts for optimal context selection.
|
||||||
|
|
@ -249,6 +249,7 @@ s_i = \underset{j\in[1,k]}{\arg\min}(sd(i,e_j) \textrm{ with } sd(i,e_j)<T_j)
|
||||||
\end{equation}
|
\end{equation}
|
||||||
In the case where no distance is below the threshold, the sample defaults to the \textit{unknown} state.
|
In the case where no distance is below the threshold, the sample defaults to the \textit{unknown} state.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Algorithm}
|
\subsection{Algorithm}
|
||||||
The algorithm for \gls{mad} follows three steps:
|
The algorithm for \gls{mad} follows three steps:
|
||||||
|
|
||||||
|
|
@ -354,12 +355,83 @@ Thus the second part also terminates.
|
||||||
Finally, the third part uses the sames loops as the second and also terminates.
|
Finally, the third part uses the sames loops as the second and also terminates.
|
||||||
Overall, \gls{mad} always terminates for any finite time series and finite set of finite patterns.
|
Overall, \gls{mad} always terminates for any finite time series and finite set of finite patterns.
|
||||||
|
|
||||||
\textbf{Monotony of number of unknown sample}\agd{find better title}
|
\textbf{Influence of $\alpha$}
|
||||||
\agd{Explain that the number of unknown sample is monotonic as a function of alpha.
|
The shrink coefficient $\alpha$ is the only hyperparameter of the detector.
|
||||||
Also, a sample that is classified as unknown will always remain unknown if alpha decreases.}
|
Its default value is one.
|
||||||
|
$\alpha$ controls the threshold of similarity that a substring should cross to get qualified as a match to a pattern.
|
||||||
|
$\alpha$ takes its value in $\mathbb{R}_*^+$.
|
||||||
|
The default value for $\alpha$ is one.
|
||||||
|
This value follows the intuitive reasoning presented in Section~\ref{sec:solution}.
|
||||||
|
|
||||||
\section{Evaluation}
|
To better understand the influence of the shrink coefficient, the algorithm can be perceived as a 2D area segmentation problem.
|
||||||
The evaluation of \gls{mad} consists in the detection of the states for time series from various machines.
|
Let us consider the 2D plane where each pattern has a position based on its shape.
|
||||||
|
A substring to classify also has a position in the plane and a distance to each pattern (see bottom part of Figure~\ref{fig:overview}).
|
||||||
|
During classification, the substring takes the label of the closest pattern.
|
||||||
|
For any pattern $P_j$, the set of positions in the plane that are assigned to $P_j$ --- i.e., the set of positions for which $P_j$ is the closest pattern --- is called the area of attraction of $P_j$.
|
||||||
|
In a classic \gls{1nn} context, every point in the plane is in the area of attraction of one pattern.
|
||||||
|
|
||||||
|
This infinite area of attraction is not a desirable feature in this context.
|
||||||
|
Let us consider now a time series exhibiting anomalous or unforeseen behavior.
|
||||||
|
Some substrings in this time series do not resemble any of the provided pattern.
|
||||||
|
In an infinite area of attraction context, the anomalous points are assigned to a pattern, even if they poorly match it.
|
||||||
|
As a result, the behavior of the security rule can become unpredictable as anomalous points can receive a seemingly random label.
|
||||||
|
|
||||||
|
A more desirable behavior of the state detection system is to inform of the presence of unpredicted behavior.
|
||||||
|
This behavior naturally emerges when the areas of attraction of the patterns are limited to a finite size.
|
||||||
|
The shrink coefficient $\alpha$ --- through the modification of the threshold $T_j$ --- provides control over the shrink of the areas of attraction.
|
||||||
|
The lower the value of $\alpha$, the smaller the areas of attraction around each sample.
|
||||||
|
Applying a coefficient to the thresholds produces a reduction of the radius of the area of attraction, not an homothety of the initial areas.
|
||||||
|
In other words, the shrink does not preserve the shape of the area.
|
||||||
|
For a value $\alpha < 0.5$, all areas become disks --- in the 2D representation --- and all shape information are lost.
|
||||||
|
|
||||||
|
The impact of the $\alpha$ coefficient on the classification is monotonic and predictable.
|
||||||
|
Because $\alpha$ influences the thresholds, changing $\alpha$ results in moving the transitions in the detected labels.
|
||||||
|
In other words, a lower value of $\alpha$ expands the unknown segments while a higher value shrinks them until they disappear.
|
||||||
|
Figure~\ref{fig:alpha_impact} illustrates the impact $\alpha$ on the width of unknown segments.
|
||||||
|
The impact of $\alpha$ on the number of unknown sample is also monotonic.
|
||||||
|
|
||||||
|
\begin{proof}
|
||||||
|
We prove the monotony of the number of unknown samples as a function of $\alpha$ by induction.
|
||||||
|
The base case is $\alpha=0$.
|
||||||
|
In this case, the threshold for every pattern $P_j\in P$ is $T_j = \alpha\times ID_j = 0$.
|
||||||
|
With every $T_j=0$, no sample can have a distance below the threshold and every sample is labeled as \textit{unknown}.
|
||||||
|
|
||||||
|
For the induction case, let us consider $\alpha$ increasing from the value $\alpha_0$ to $\alpha_1 = \alpha_0 + \delta$ with $\delta \in \mathbb{R}_*^+$.
|
||||||
|
The increasing of $\alpha$ induces the increase of every threshold $T$ from the value $T_0$ to $T_1$
|
||||||
|
\begin{equation}
|
||||||
|
\alpha_0 <\alpha_1 \rightarrow T_0 < T_1
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
|
For every value of every threshold $T$ we can define a set of all samples below the threshold as $S_T$.
|
||||||
|
When a threshold increases from $T_0$ to $T_1$, all the samples in $S_{T_0}$ also belong in $S_{T_1}$ by the transitivity of order in $\mathbb{R}_*^+$.
|
||||||
|
It is also possible for samples to belong to $S_{T_1}$ but not to $S_{T_0}$ if their distance falls between $T_0$ and $T_1$.
|
||||||
|
Hence, $S_{T_0}$ is a subset of $S_{T_1}$ and the cardinality of $S_T$ as a function of $T$ is monotonically non-decreasing.
|
||||||
|
|
||||||
|
We conclude that the number of unknown samples --- i.e.,samples above every thresholds --- as a function of $\alpha$ is monotonically non-increasing.
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
|
|
||||||
|
Figure~\ref{fig:alpha} presents the number of unknown samples in the classification of the NUCPC-1 time series based on the value of $\alpha$.
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.49\textwidth]{images/alpha.pdf}
|
||||||
|
\caption{Evolution of the number of unknown samples based on the value of the shrink coefficient $\alpha$.}
|
||||||
|
\label{fig:alpha}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.49\textwidth]{images/alpha_impact.pdf}
|
||||||
|
\caption{Behavior of the classifier with different values of $\alpha$. A lower value of $\alpha$ expands the unknown sections (orange sections).}
|
||||||
|
\label{fig:alpha_impact}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\section{Case Study 1: Comparison with Other Methods}
|
||||||
|
The first evaluation of \gls{mad} consists in the detection of the states for time series from various machines.
|
||||||
We evaluate the performance of the proposed solution against other traditional methods to illustrate the capabilities and advantages of \gls{mad}.
|
We evaluate the performance of the proposed solution against other traditional methods to illustrate the capabilities and advantages of \gls{mad}.
|
||||||
|
|
||||||
\subsection{Performance Metrics}
|
\subsection{Performance Metrics}
|
||||||
|
|
@ -493,78 +565,6 @@ With both performances metrics combined, \gls{mad} outperforms the other methods
|
||||||
\label{fig:res}
|
\label{fig:res}
|
||||||
\end{figure*}
|
\end{figure*}
|
||||||
|
|
||||||
\subsection{Influence of $\alpha$}
|
|
||||||
The shrink coefficient $\alpha$ is the only hyperparameter of the detector.
|
|
||||||
Its default value is one.
|
|
||||||
$\alpha$ controls the threshold of similarity that a substring should cross to get qualified as a match to a pattern.
|
|
||||||
$\alpha$ takes its value in $\mathbb{R}_*^+$.
|
|
||||||
The default value for $\alpha$ is one.
|
|
||||||
This value follows the intuitive reasoning presented in Section~\ref{sec:solution}.
|
|
||||||
|
|
||||||
To better understand the influence of the shrink coefficient, the algorithm can be perceived as a 2D area segmentation problem.
|
|
||||||
Let us consider the 2D plane where each pattern has a position based on its shape.
|
|
||||||
A substring to classify also has a position in the plane and a distance to each pattern (see bottom part of Figure~\ref{fig:overview}).
|
|
||||||
During classification, the substring takes the label of the closest pattern.
|
|
||||||
For any pattern $P_j$, the set of positions in the plane that are assigned to $P_j$ --- i.e., the set of positions for which $P_j$ is the closest pattern --- is called the area of attraction of $P_j$.
|
|
||||||
In a classic \gls{1nn} context, every point in the plane is in the area of attraction of one pattern.
|
|
||||||
|
|
||||||
This infinite area of attraction is not a desirable feature in this context.
|
|
||||||
Let us consider now a time series exhibiting anomalous or unforeseen behavior.
|
|
||||||
Some substrings in this time series do not resemble any of the provided pattern.
|
|
||||||
In an infinite area of attraction context, the anomalous points are assigned to a pattern, even if they poorly match it.
|
|
||||||
As a result, the behavior of the security rule can become unpredictable as anomalous points can receive a seemingly random label.
|
|
||||||
|
|
||||||
A more desirable behavior of the state detection system is to inform of the presence of unpredicted behavior.
|
|
||||||
This behavior naturally emerges when the areas of attraction of the patterns are limited to a finite size.
|
|
||||||
The shrink coefficient $\alpha$ --- through the modification of the threshold $T_j$ --- provides control over the shrink of the areas of attraction.
|
|
||||||
The lower the value of $\alpha$, the smaller the areas of attraction around each sample.
|
|
||||||
Applying a coefficient to the thresholds produces a reduction of the radius of the area of attraction, not an homothety of the initial areas.
|
|
||||||
In other words, the shrink does not preserve the shape of the area.
|
|
||||||
For a value $\alpha < 0.5$, all areas become disks --- in the 2D representation --- and all shape information are lost.
|
|
||||||
|
|
||||||
The impact of the $\alpha$ coefficient on the classification is monotonic and predictable.
|
|
||||||
Because $\alpha$ influences the thresholds, changing $\alpha$ results in moving the transitions in the detected labels.
|
|
||||||
In other words, a lower value of $\alpha$ expands the unknown segments while a higher value shrinks them until they disappear.
|
|
||||||
Figure~\ref{fig:alpha_impact} illustrates the impact $\alpha$ on the width of unknown segments.
|
|
||||||
The impact of $\alpha$ on the number of unknown sample is also monotonic.
|
|
||||||
|
|
||||||
\begin{proof}
|
|
||||||
We prove the monotony of the number of unknown samples as a function of $\alpha$ by induction.
|
|
||||||
The base case is $\alpha=0$.
|
|
||||||
In this case, the threshold for every pattern $P_j\in P$ is $T_j = \alpha\times ID_j = 0$.
|
|
||||||
With every $T_j=0$, no sample can have a distance below the threshold and every sample is labeled as \textit{unknown}.
|
|
||||||
|
|
||||||
For the induction case, let us consider $\alpha$ increasing from the value $\alpha_0$ to $\alpha_1 = \alpha_0 + \delta$ with $\delta \in \mathbb{R}_*^+$.
|
|
||||||
The increasing of $\alpha$ induces the increase of every threshold $T$ from the value $T_0$ to $T_1$
|
|
||||||
\begin{equation}
|
|
||||||
\alpha_0 <\alpha_1 \rightarrow T_0 < T_1
|
|
||||||
\end{equation}
|
|
||||||
|
|
||||||
For every value of every threshold $T$ we can define a set of all samples below the threshold as $S_T$.
|
|
||||||
When a threshold increases from $T_0$ to $T_1$, all the samples in $S_{T_0}$ also belong in $S_{T_1}$ by the transitivity of order in $\mathbb{R}_*^+$.
|
|
||||||
It is also possible for samples to belong to $S_{T_1}$ but not to $S_{T_0}$ if their distance falls between $T_0$ and $T_1$.
|
|
||||||
Hence, $S_{T_0}$ is a subset of $S_{T_1}$ and the cardinality of $S_T$ as a function of $T$ is monotonically non-decreasing.
|
|
||||||
|
|
||||||
We conclude that the number of unknown samples --- i.e.,samples above every thresholds --- as a function of $\alpha$ is monotonically non-increasing.
|
|
||||||
\end{proof}
|
|
||||||
|
|
||||||
|
|
||||||
Figure~\ref{fig:alpha} presents the number of unknown samples in the classification of the NUCPC-1 time series based on the value of $\alpha$.
|
|
||||||
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includegraphics[width=0.49\textwidth]{images/alpha.pdf}
|
|
||||||
\caption{Evolution of the number of unknown samples based on the value of the shrink coefficient $\alpha$.}
|
|
||||||
\label{fig:alpha}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includegraphics[width=0.49\textwidth]{images/alpha_impact.pdf}
|
|
||||||
\caption{Behavior of the classifier with different values of $\alpha$. A lower value of $\alpha$ expands the unknown sections (orange sections)}
|
|
||||||
\label{fig:alpha_impact}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure*}
|
\begin{figure*}
|
||||||
\centering
|
\centering
|
||||||
|
|
@ -573,6 +573,9 @@ Figure~\ref{fig:alpha} presents the number of unknown samples in the classificat
|
||||||
\label{fig:areas}
|
\label{fig:areas}
|
||||||
\end{figure*}
|
\end{figure*}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\section{Case Study 2: Attack Scenarios}
|
||||||
|
|
||||||
\section{Discussion}\label{sec:discussion}
|
\section{Discussion}\label{sec:discussion}
|
||||||
In this section we highlight specific aspects of the proposed solution.
|
In this section we highlight specific aspects of the proposed solution.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue