gramarly

2023-06-22 19:17:00 -04:00 · 2023-06-22 19:17:00 -04:00 · f80998c742
commit f80998c742
parent 0ce1efe3f1
2 changed files with 151 additions and 29 deletions
--- a/BPV/qrs/glossary.typ
+++ b/BPV/qrs/glossary.typ
@ -0,0 +1,123 @@
+// Glossary code by Hugo Cartigny (BlueskyFR) 🍉
+
+#let glossary(indent-defs: false, doc) = {
+  // ✨ The glossary displays its items using level 99 headings
+  let glossary = state("wow", (:))
+  
+  // Hide the numbering for level 99 titles
+  show heading.where(level: 99): it => text(weight: "regular", it.body)
+  
+  let page-refs-color = rgb("#7630EA")
+  
+  show terms: list => {
+    let terms-grid = ()
+    // Add terms to glossary
+    for item in list.children {
+      glossary.update(v => {
+          v.insert(
+            item.term.text,
+            (
+              // Holds the list of the locations referencing the term
+              ref-locs: (),
+              // The actual term definition
+              def: item.description,
+            )
+          )
+  
+          // Return the new state with the added entry
+          v
+      })
+
+      if indent-defs {
+        // Term
+        terms-grid.push([
+          #heading(level: 99, numbering: "1")[*#item.term*]
+          #label(item.term.text)
+        ])
+  
+        // Definition
+        terms-grid.push([
+          #item.description
+          // Pages where the term is referenced
+          #show: text.with(page-refs-color)
+          #locate(loc => {
+            glossary.final(loc).at(item.term.text).ref-locs
+              .map(l => link(l, str(l.page)))
+              .join(", ")
+          })
+        ])
+      
+      } else [
+        // Display items directly one by one since
+        // we don't need to build a grid
+        
+        // Use a level 99 title so it doesn't conflict with regular ones
+        // and it can be refered to by @citations
+        #heading(level: 99, numbering: "1")[
+          *#item.term*:~~#item.description
+          // Pages where the term is referenced
+          #show: text.with(page-refs-color)
+          #locate(loc => {
+            glossary.final(loc).at(item.term.text).ref-locs
+              .map(l => link(l, str(l.page)))
+              .join(", ")
+          })
+        ]
+        #label(item.term.text) \
+      ]
+    }
+
+    if indent-defs {
+      grid(
+        columns: (1fr, 4fr),
+        column-gutter: 2mm,
+        row-gutter: 8mm,
+        ..terms-grid
+      )
+    }
+  
+    // 🐛 Debug
+    //glossary.display()
+  }
+  
+  show ref: r => {
+    locate(loc => {
+      // Search for the source of the ref
+      let term = str(r.target)
+      let res = query(r.target, loc)
+  
+      // If the source exists and is the glossary (heading level 99)
+      if res.len() > 0 and res.first().level == 99 {
+        let entry = glossary.at(loc).at(term)
+  
+        // Replace term by the user-specified supplement if not none
+        let custom-term = {
+          if r.citation.supplement != none { r.citation.supplement }
+          else { term }
+        }
+        
+        // If it is the first reference to the term, display its definition too
+        link(res.first().location(), {
+          if entry.ref-locs.len() == 0 [*#entry.def* (#custom-term)]
+          else [#custom-term]
+        })
+  
+        // Add location to the term's ref list if the current page
+        // is not already listed
+        glossary.update(v => {
+          // If this page is not in, push the loc in!
+          if v.at(term).ref-locs.all(l => l.page != loc.page()) {
+            v.at(term).ref-locs.push(
+              // Current page loc
+              loc.position()
+            )
+          }
+          v
+        })
+      }
+      else { r } // Otherwise just return the ref as it is
+    })
+  }
+
+  doc
+}
--- a/BPV/qrs/main.typ
+++ b/BPV/qrs/main.typ
@ -75,17 +75,16 @@
 // add spaces around lists and tables
 #show enum: l =>{v(5pt)
  l
-  //v(5pt)
 }

 #show list: l =>{v(5pt)
  l
-  //v(5pt)
 }

 #show table: t=>{v(10pt)
  t
-  v(5pt)}
+  v(5pt)
+}


 = Introduction
@ -114,13 +113,13 @@ Because of the intrinsic properties of side-channel information, the integrity e
 A distance-based outlier detector that uses power traces of a nominal boot-up sequence can learn the expected pattern and detect any variation in a new boot-up sequence.
 This novel solution can detect various attacks centred around manipulating firmware.
 In addition to its versatility of detection, it is also easily retrofittable to almost any embedded system with @DC input and a consistent boot sequence.
-It requires minimal training examples and minor hardware modification in most cases, especially for @DC powered devices.
+It requires minimal training examples and minor hardware modification in most cases, especially for DC-powered devices.

 == Paper Organization
 We elaborate on the type of attacks that our method aims to mitigate in the threat model @threat and the technology we leverage to capture relevant information in Section @SCA.
-Secion~@bpv describe the proposed solution.
-Sections~@exp-network,~@exp-drone and~@aim present test cases that illustrates applications and variations of the @BPV.
-Finally, the paper finishes with Section @discussion that provides more insight on specific aspects of the proposed solution and Section~@conclusion for the conclusion.
+@bpv describes the proposed solution.
+@exp-network,~@exp-drone, and~@aim present test cases that illustrate applications and variations of the @BPV.
+Finally, the paper finishes with @discussion that provides more insight on specific aspects of the proposed solution and Section~@conclusion for the conclusion.


 = Related Work
@ -158,7 +157,7 @@ All these methods illustrate the potential of power side channels for attacks, b
 After all, the lack of interaction required with the machine benefits the defense mechanism by increasing bypasses difficulty.
 Following this idea, Clark et al. @wud proposed in 2013 a power consumption-based malware detector for medical devices.
 Hernandez et al. included power consumption with network data for malware detection @8855288.
-Electrical power consumption is especially appropriate for infering the machine activity for different reasons.
+Electrical power consumption is especially appropriate for inferring the machine's activity for different reasons.
 First, it is easy to measure in a reproducible manner.
 Then, it can be easy to get access to relevant power cables with little tampering from the machine when the power conversion from @AC to @DC power is performed outside the machine.
 It is also a common side channel to all embedded systems as they all consume electricity.
@ -202,7 +201,6 @@ Such alternatives can be found for computers @coreboot, routers @owrt @ddwrt @fr
 These alternative firmware are often open-source and provide more features, capabilities and performances as they are updated and optimized by their community.
 Implementing alternative firmware on a machine could allow an attacker to gain control of it without necessarily alerting the end user.

-#agd[add a section about the capture process. Either here or in the discussion and reference it.]
 // = Side Channel Analysis<sca>
 // @SCA leverages the emissions of a system to gain information about its operations.
 // Side channels are defined as any involuntary emission from a system.
@ -253,7 +251,7 @@ The training sequence follows two steps.
 + The sequence computes the threshold as $"thresh" = 1.5 dot "IQR"(D)$ with IQR the Inter-Quartile Range of the distances set $D$.

 The @IQR is a measure of the dispersion of samples.
-It is based on the first and third quartiles and defined as $ "IQR" = Q_3 - Q_1$ with $Q_3$ the third quartile and $Q_1$ the first quartile.
+It is based on the first and third quartiles and defined as $ "IQR" = Q_3 - Q_1$ with $Q_3$ being the third quartile and $Q_1$ being the first quartile.
 This value is commonly used @han2011data to detect outliers as a similar but more robust alternative to the $3"sigma"$ interval of a Gaussian distribution.
 To apply the @IQR to the times series, we compute first compute the average of the NORMAL traces.
 This average serves as a reference for computing the distance of each trace.
@ -279,7 +277,7 @@ If the new trace does not match any model, then it does not follow any of the no

 = Test Case 1: Network Devices<exp-network>

-To verify the performance of the proposed detector, we design an experiment that aims at detecting firmware modifications on different devices .
+To verify the performance of the proposed detector, we design an experiment that aims at detecting firmware modifications on different devices.
 Networking devices are a vital component of any organization, from individual houses to complete data centers @downtime.
 A network failure can result in significant downtime that is extremely expensive for data centers.
 Compromised network devices can also result in data breaches and @APT.
@ -297,9 +295,9 @@ This experiment illustrates the firmware verification capability of a side-chann

 == Experimental Setup<setup>
 Although this experiment is conducted in a controlled environment, the setup to a real deployment (see @capture for more details).
-We gather data from the four networking equipment which are connected to a managed @PDU (see @capture for more details).
+We gather data from the four networking equipment, which are connected to a managed @PDU (see @capture for more details).
 This @PDU's output can be controlled by sending instructions on a telnet interface and enables turning each machine on or off automatically.
-Each machine will undergo firmware change or version change to represent a firmware attack.
+Each machine will undergo a firmware change or version change to represent a firmware attack.
 The changes are listed in @tab-machines.

 #figure(
@ -319,7 +317,7 @@ This experiment aims at simulating an attack situation by performing firmware mo
 For the switches, we flash different firmware versions provided by the \gle{oem}.
 For wireless routers, their firmware is changed from the @OEM to different versions of #link("https://openwrt.org/")[OpenWrt].
 In this study, we consider the latest @OEM firmware version to be the nominal version, expected to be installed on the machine by default.
-Any other version or firmware represent an attack and is considered anomalous.
+Any other version or firmware represents an attack and is considered anomalous.

 == Experiment procedure
 To account for randomness and gather representative boot-up sequences of the device, we performed 500 boot iterations for each machine.
@ -338,7 +336,7 @@ We obtain the result per machine and per model.
 The training dataset is generated by injecting artificial anomalies, but the evaluation is performed on actual anomalous traces collected in a controlled environment.
 For each evaluation, a random set of $10$ consecutive traces is selected from the NORMAL label to serve as the seed for the anomaly generation.
 The anomaly generator returns a training dataset composed of normal traces on one side and anomalous artificial traces on the other.
-The models train using this dataset and are evaluated against a balanced dataset combining $M in [20,50]$ consecutive anomalous traces selected at random across all abnormal classes and as many nonimal traces.
+The models train using this dataset and are evaluated against a balanced dataset combining $M in [20,50]$ consecutive anomalous traces selected at random across all abnormal classes and as many nominal traces.
 The testing set is balanced between nominal and abnormal traces.
 The training requires only a few nominal traces.
 This evaluation is repeated $50$ times, and the $F_1$ score is computed for each iteration.
@ -482,12 +480,12 @@ The goal is not the reproduce exact anomalous traces but to generate a wide vari
 #figure(
  image("images/Bootup_traces_TPLINK.svg", width: 100%),
  caption: [
-    Example of TP-Link switch boot-up traces for different firmware versions. The anomalous firmware (FIRMWARE V2) present both a $y$ and $x$ shift.
+    Example of TP-Link switch boot-up traces for different firmware versions. The anomalous firmware (FIRMWARE V2) presents both a $y$ and $x$ shift.
  ],
 )<fig-boot-up_traces_TPLINK>

-@fig-boot-up_traces_TPLINK illustrate the domain knowledge extracted from this machine.
-The anomalies that the power trace exibit are a combination of types of transformations.
+@fig-boot-up_traces_TPLINK illustrates the domain knowledge extracted from this machine.
+The anomalies that the power trace exhibits are a combination of types of transformations.

 - The trace is shifted along the $y$ axis. In this case, the anomalous firmware consumes significantly more or less power than the normal one. This shift can affect the whole trace or only a part of it. This can be the result of different usage of the machine's components or a significant change in the firmware instructions.
 - The trace is delayed or in advance along the $x$ axis. The anomalous trace presents the same patterns and amplitude as the normal trace but at different points in time. This shift can occur when parts of the firmware are added or removed by updates.
@ -508,7 +506,7 @@ The possible transformations are:
  caption: [Overview of the @BPV model training and evaluation.],  
 )<fig-overview>

-The resulting dataset does not exactly resemble the anomalous traces that are collected but presents traces with the same range of distance to normal traces (see  @fig-Synthetic_vs_Normal_TPLINK).
+The resulting dataset does not exactly resemble the anomalous traces that are collected but presents traces with the same range of distances to normal traces (see  @fig-Synthetic_vs_Normal_TPLINK).
 To avoid introducing training biases, the dataset is balanced by generating new normal traces using the average and standard deviation if required.


@ -561,13 +559,13 @@ However, the lack of transferability of the proposed methods indicates that furt


 = Discussion<discussion>
-This section elaborate on some important aspects of this study.
+This section elaborates on some important aspects of this study.

 == Capture Process<capture>
 We use a hardware device referred to as the capture box @hidden placed in series with the primary power cable of the target device.
-The technology for measuring the current differ depending on the capture box's version.
-For test case 1 and 3, the box's shunt resistor generates a voltage drop representative of the global power consumption of the machine.
-For test case 2, a Hall effect sensor return a voltage proportional to the current.
+The technology for measuring the current differs depending on the capture box's version.
+For test cases 1 and 3, the box's shunt resistor generates a voltage drop representative of the global power consumption of the machine.
+For test case 2, a Hall effect sensor returns a voltage proportional to the current.
 For both versions, the voltage value is sampled at 10 KSPS.
 These samples are packaged in small fixed-size chunks and sent to a data aggregation server on a private @VLAN.
 The data aggregation server is responsible for gathering data from all of our capture boxes and sending it via a @VPN tunnel to a storage server.
@ -588,16 +586,16 @@ The complete dataset corresponding to this experiment is available online @datas

 == Support for Online Training
 In order for the @BPV to integrate in a realistic environment, the training procedure takes the rareness of the boot-up event into account.
-Once the measurement device is setup on the machine to protect, the streaming time series representing the power consumption serves as input for the bootup detection algorithm (see @sds).
+Once the measurement device is set up on the machine to protect, the streaming time series representing the power consumption serves as input for the bootup detection algorithm (see @sds).
 Each bootup event is extracted and added to a dataset of bootup traces.
 Once the dataset reaches the expected number of samples, the @BPV computes the threshold and is ready for validation of the next bootup.
-The complete training and validation procedures require no human interractions.
+The complete training and validation procedures require no human interactions.

-In the case of a multi-modal models, the training procedure require one human interraction.
+In the case of a multi-modal model, the training procedure requires one human interaction.
 Presented with the bootup samples, an operator can transform the model into a multi-modal model by separating the training samples into multiple modes.
-Once the separation is performed, the training procedure resumes without interraction and the next bootup samples are assigned to the closest mode.
+Once the separation is performed, the training procedure resumes without interaction, and the next bootup samples are assigned to the closest mode.

-Thanks to its low-complexity and support for multi-modes, the @BPV can adapt during training to changes in the training data and supports switching between single and multi-modes.
+Thanks to its low complexity and support for multi-modes, the @BPV can adapt during training to changes in the training data and supports switching between single and multi-modes.

 = Conclusion<conclusion>
 This study illustrates the applicability of side-channel analysis to detect firmware attacks.
@ -605,4 +603,5 @@ The proposed side-channel-based @IDS can detect firmware tampering from the powe
 Moreover, distance-based models leveraged in this study allow minimal training data and training time requirements.
 On a per-machine basis, anomaly generation can enhance the training set without additional anomalous data capture.
 Finally, deploying this technology to production networking equipment requires minimal downtime and hardware intrusion, and it is applicable to clientless equipment.
-This study illustrates the potential of independent, side-channel-based @IDS for the detection of low-level attacks that can compromise machines event before the operating system gets loaded.
+This study illustrates the potential of independent, side-channel-based @IDS for the detection of low-level attacks that can compromise machines even before the operating system gets loaded.
+