initial migration

2024-08-05 20:24:56 +02:00 · 2024-08-05 20:24:56 +02:00 · 805a34f937
commit 805a34f937
parent 63ff6b2009
30 changed files with 4963 additions and 9 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+drafts/*
--- a/data/an-openioc-graph-a-different-kind-of-rule-scheme.md
+++ b/data/an-openioc-graph-a-different-kind-of-rule-scheme.md
@ -0,0 +1,240 @@
+Despite that I think that implementing a full-fledged
+XML-editor is too complex for an operational scenario, I
+believe the OpenIOC-format, which has been in the works at
+Mandiant for a couple of years now, is quite good. They also
+have the IOC Writer which was launched at last summers Black
+Hat. OpenIOC can export to other expression languages, such
+as Yara [1], as well.
+
+I have been thinking of a way to combine graph knowledge
+with exactly that for a while, an expressive detection
+language based on a graph. If combining two things you love,
+I have learned that it simply can't end badly, it must end
+with something amazing. Let's give it a try!
+
+So I went about it, starting off by importing a sample
+Maltego-graph to Titan on HBase [2]. I basically set out
+with five connected nodes in Maltego Tungsten. Nothing
+malicious, just a national newspaper.
+
+Running that through my Rexster migration script results in
+a equivalent graph on the Rexster server.
+
+It's nice considering if you'd like to put it in a larger
+context with millions or billions of vertices you would like
+to trigger on. That is out of bounds for Maltego, or your
+desktop system in general.
+
+
+## The OpenIOC Part
+
+If looking at the graphs above, you will probably agree that
+it isn't especially describing of certain incidents or other
+contextual data. But what if we could combine the graph with
+something like OpenIOC? Turns out that it's conceptually
+similar. The weakness of OpenIOC is that it doesn't scale
+when firing up an OpenIOC editor - like the one Mandiant
+have created. On the other hand, if you could traverse a
+graph with OpenIOC designed around the OpenIOC format..
+
+Let's create a basic writer as a demonstration, which
+operates on the root level (no nesting of rules in this
+example).
+
+    from ioc_writer import ioc_api
+    from lxml import etree as et
+
+    class IOC:
+        def __init__(self):
+            self.IOC = ioc_api.IOC(name='Test', description='An IOC generated from a Python script', author='Someone')
+
+            self.IOC.set_created_date()
+            self.IOC.set_published_date()
+            self.IOC.set_lastmodified_date()
+            self.IOC.update_name('test_rexster')
+            self.IOC.update_description('A Test')
+            self.id = self.IOC.iocid
+
+        def addNode(self,label,text,type,indicator,condition='is'):
+        IndicatorItem_node = ioc_api.make_IndicatorItem_node(condition, label, text, type, indicator)
+            current_guid = IndicatorItem_node.attrib['id']
+            print current_guid
+            self.IOC.top_level_indicator.append(IndicatorItem_node)
+
+        def __str__(self):
+            self.xml = et.tostring(self.IOC.root, encoding='utf-8', xml_declaration=True, pretty_print=True)
+            return self.xml
+
+This enables us to do something like this:
+
+    ioc = IOC()
+    ioc.addNode('test','Just a test','domain','vg.no')
+    print ioc
+
+Which will again return the XML of the IOC.
+
+    <?xml version='1.0' encoding='utf-8'?>
+    <OpenIOC xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns="http://openioc.org/schemas/OpenIOC_1.1" id="06fd70db-992c-4678-83e6-8f1b150e8bcf" last-modified="2014-01-28T07:15:09" published-date="2014-01-28T07:15:09">
+      <metadata>
+        <short_description>test</short_description>
+        <description>A Test</description>
+        <keywords/>
+        <authored_by>Someone</authored_by>
+        <authored_date>2014-01-28T07:15:09</authored_date>
+        <links/>
+      </metadata>
+      <criteria>
+        <Indicator id="fbbb2883-473a-4a1c-92c4-692e199adb61" operator="OR">
+          <IndicatorItem id="14a42d26-b056-4b2e-a327-7d6edb25457e" condition="is" preserve-case="false" negate="false">
+            <Context document="test" search="Just a test" type="mir"/>
+            <Content type="domain">vg.no</Content>
+            <IndicatorItem id="dff6e0c5-613b-4bea-8bad-bb7a36b3ccdf" condition="is" preserve-case="false" negate="false">
+              <Context document="test" search="Just a test" type="mir"/>
+              <Content type="ip">195.88.55.16</Content>
+            </IndicatorItem>
+          </IndicatorItem>
+        </Indicator>
+      </criteria>
+      <parameters/>
+    </OpenIOC>
+
+Reviewing the XML above you might notice that the scheme is
+pretty transferrable to a graph, perhaps even simplifying of
+the IOC XML. Be especially aware on the following tags and
+attributes:
+
+* Content
+* The IndicatorItem condition
+* The content type
+
+A nested IOC might look like this (relevant excerpt):
+
+        <Indicator id="b12f8c27-d168-49b5-bc75-cec86bf21d3f" operator="OR">
+          <IndicatorItem id="af4323dc-a967-4fe3-b62f-b461b90a3550" condition="is" preserve-case="false" negate="false">
+            <Context document="test" search="Just a test" type="mir"/>
+            <Content type="domain">vg.no</Content>
+            <IndicatorItem id="2ff639ca-dcec-4967-ac06-f54989bf3dc4" condition="is" preserve-case="false" negate="false">
+              <Context document="test" search="Just a test" type="mir"/>
+              <Content type="ip">195.88.55.16</Content>
+            </IndicatorItem>
+          </IndicatorItem>
+        </Indicator>
+
+The above implies that the domain vg.no needs to be
+accompanied with the IP-address ``195.88.55.16``.
+
+## Merging the Best of Two Worlds
+
+So now that we have had a look at the power in the structure
+of a graph and the power of expression in the OpenIOC
+XML-indicators, you might see why this is the best of two
+worlds.
+
+In the challenge of combining them both I perhaps
+oversimplified the nesting and used the two previously
+mentioned attributes in the graph, adding the content as the
+value of the node and the condition. We will also have to
+add the type attribute since that tells us what type of
+OpenIOC entry we have when reversing the process later
+on. We will have a small collision between Maltego and
+OpenIOC, since for instance an IP-address type will
+differ. So for now you will need two type attributes, one
+for Maltego and one for OpenIOC (if you plan to go both
+ways). This is left as an exersise for the reader.
+
+Creating an OpenIOC-compatible graph is a breeze:
+
+    from rexpro import RexProConnection
+
+    class Graph:
+        def __init__(self):
+            self.graph = RexProConnection('localhost',8184,'titan')
+
+        def addVertice(self,content,content_type,condition):
+            vertice_id = self.graph.execute("""
+                def v1 = g.addVertex([content:content,content_type:content_type,condition:condition])
+                return v1""", 
+                {'content':content, 'content_type':content_type, 'condition':condition})
+            return vertice_id
+
+        def addEdge(self,vid1,vid2,label):
+            edge = self.graph.execute("""
+                def v1 = g.v(vid1)
+                def v2 = g.v(vid2)
+                g.addEdge(v1, v2, label)
+                g.commit()""",{'vid1':vid1['_id'], 'vid2':vid2['_id'], 'label':label})
+
+    graph=Graph()
+    v1=graph.addVertice('vg.no','domain','is')
+    v2=graph.addVertice('195.88.55.16','ip','is')
+    graph.addEdge(v1,v2,'and')
+
+If you'd like to go the other way again in order to talk to
+other organisations perhaps, you will want to run the
+process in reverse:
+
+    from rexpro import RexProConnection
+
+    class RexsterIOC:
+        def __init__(self):
+            self.graph = RexProConnection('localhost',8184,'titan')
+
+            self.IOC = ioc_api.IOC(name='Test', description='A test IOC generated from Rexster', author='Someone')
+
+            self.IOC.set_created_date()
+            self.IOC.set_published_date()
+            self.IOC.set_lastmodified_date()
+            #IOC.add_link('help', self.baseurl + url)
+            self.IOC.update_name('test')
+            self.IOC.update_description('A Test')
+            self.id = self.IOC.iocid
+            self.lastId=None
+
+        def     addNode(self,label,text,type,indicator,condition='is',addToLast=False):
+            IndicatorItem_node = ioc_api.make_IndicatorItem_node(condition, label, text, type, indicator)
+
+            if addToLast and self.last:
+                self.last.append(IndicatorItem_node)
+            else:
+                self.IOC.top_level_indicator.append(IndicatorItem_node)
+
+            current_guid = IndicatorItem_node.attrib['id']
+            self.last  = IndicatorItem_node
+
+        def traverse(self,rootNodeId):
+            root=self.graph.execute("""return g.v(80284)""",{'vid':str(rootNodeId)})
+            self.addNode('test','Just a test',
+                root['_properties']['content_type'],
+                root['_properties']['content'],
+                root['_properties']['condition'])
+
+            one_level_out=self.graph.execute("""return g.v(vid).out""",{'vid':str(rootNodeId)})
+            for vertex in one_level_out:
+                self.addNode('test','Just a test',
+                    vertex['_properties']['content_type'],
+                    vertex['_properties']['content'],
+                    vertex['_properties']['condition'],addToLast=True)      
+
+        def __str__(self):
+            self.xml = et.tostring(self.IOC.root, encoding='utf-8', xml_declaration=True, pretty_print=True)
+            return self.xml
+
+    ioc = RexsterIOC()
+    ioc.traverse(80284) # the root node
+    print ioc
+
+One thing that you can now do is to store the indicators
+with the rest of your network data. This again will imply
+that the edges are created automatically without any need to
+actually run jobs to combine data for detecting stuff.
+
+That's my small concept demonstration. I think it's pretty
+cool!
+
+I've put the scripts in a Gist for you if you'd like to give
+it a try [3].
+
+
+[1] Yara: https://github.com/mandiant/ioc_writer/tree/master/examples/openioc_to_yara  
+[2] Importing a sample Maltego-graph to Titan on HBase: https://gist.github.com/tommyskg/8166472  
+[3] the scripts out there: https://gist.github.com/tommyskg/8671318  
--- a/data/apm-lock.md
+++ b/data/apm-lock.md
@ -0,0 +1,56 @@
+I have used OpenBSD for some time now and one of the things that I
+have had to work a bit on to get the way I like it, is locking the
+terminal upon apmd suspend. In other words locking the terminals
+when I close the lid.
+
+Since it is a bit of code and that I reuse it other places, I
+created this as a separate helper script. Thus, my
+``/etc/apm/suspend``-reference is:
+
+```
+#!/bin/ksh
+
+lock.sh&
+sleep 3
+```
+
+The suspend file executes every time the lid is closed.
+
+Once upon a time I probably used different sources for this, but
+anyways the script that I currently use are two-fold. The first
+part locks all xenodm sessions with xlock:
+
+```
+CMD_LOCK="xlock"
+
+# get all currently running xenodm sessions
+XSESSION=$(ps -axo user,ppid,args|awk '/xenodm\/Xsession/ { print
+$1,$2}')
+
+# lock all logged in X sessions
+for SESSION in "$XSESSION"; do
+  _USER=$(echo $SESSION | cut -f1 -d' ')
+  _PPID=$(echo $SESSION | cut -f2 -d' ')
+  _DISPLAY=$(ps -p $_PPID -o args=|cut -d' ' -f2)
+  su - $_USER -c "export DISPLAY=\"$_DISPLAY\" && $CMD_LOCK" &
+done
+```
+
+The second part of the script kills all active consoles. This is
+the most important part for me, since I most often lock the
+screen, but forget to log off the consoles.
+
+```
+# kill open console TTYs
+OPEN_TTYS=$(who|awk '{print $2}'|fgrep ttyC)
+for _TTY in $OPEN_TTYS; do
+  T=$(echo $_TTY|sed 's/tty//');
+  TTY_PID=$(ps -t $T|fgrep -v COMMAND|fgrep "ksh (ksh)"|awk '{print $1}');
+  kill -9 $TTY_PID;
+done
+```
+
+Please also be aware that suspending the laptop will leave things
+in plaintext, in memory, so to truly be resistant to an evil maid
+vector you would need to power off the laptop when out of a
+controlled area.
--- a/data/avenger-openbsd68.md
+++ b/data/avenger-openbsd68.md
@ -0,0 +1,27 @@
+Those following me on the Fediverse has recently become familiar
+with an old-school program called Mail Avenger. 
+
+
+```
+mkdir ~/.avenger
+openssl rand -base64 8 | shasum | head -c16 > ~/.avenger/.macpass
+echo "" >> ~/.avenger/.macpass
+```
+
+
+
+```
+brew install berkeley-db4
+curl -O http://www.mailavenger.org/dist/avenger-0.8.5.tar.gz
+echo "b0fc3e2e03ed010e95e561367fce7b087968df7ea6056251eba95cad14d26d37 avenger-0.8.5.tar.gz" | shasum -a 256 --check
+tar xvzf avenger-0.8.5.tar.gz
+cd avenger-0.8.5
+./configure --with-db=/usr/local/Cellar/berkeley-db@4/4.8.30
+cd util
+make macutil && install macutil ~/.local/bin/
+```
+
+```
+macutil --expire=+2M --from "Tommy S" --fromexp "address expires" --sender "t+return+*@252.no"
+```
+
--- a/data/cognitive-automation.md
+++ b/data/cognitive-automation.md
@ -0,0 +1,105 @@
+There is a lot of hype around many things in cyber
+security. One concept that is not, is called Cognitive
+Automation (CA). CA can be explained by comparing it to
+traditional automation. That is, how tasks are automated:
+like alerts correlation. By using cognitive automation, the
+way the mind works is taken into account. I believe many
+security professionals will recognise the practical aspects
+of Schulte's model for "Complexity of automation vs
+effectiveness/safety" [1].
+
+I've written a post on this topic years ago ("The Role of
+Cognitive Automation in Information Security"), but
+unluckily that was lost in migration. It probably needed an
+update anyways, and I believe the cyber security field is
+more mature to receive this input now rather than at that
+point.
+
+Cognitive automation is strongly applied in the aerospace
+industry for instance. In aerospace, long ago, there was a
+realisation that the strengths of thee human-being is the
+ability to learn, instinct, problem reduction, ability of
+abstraction and several others. The machine’s strength is
+parallel processing, objectivity, long-term monitoring,
+complex planning and decision making and so on. Schulte
+describes this concept in detail, in Man-Machine Cooperation
+model [1].
+
+In order to benefit from a similar model in cyber security
+there is a need to evolve the way data is extracted,
+preprocessed and prepared for human-machine interaction. As
+may be recognised at this point there are already technology
+available to provide parallel processing on the machine
+part. How a computing cluster would solve such a problem is
+the evident problem. In that regard, machine learning is the
+most promising technique to structure and classify the data
+which seems to scale really well. Efficiently ingesting,
+storing and preprocessing the data is the first stage of
+that challenge.
+
+Another detail that I would like to point out here, from the
+great book "The Multitasking Mind" by Salvucci and Taatgen,
+is how the human mind works with buffers (the aural, visual,
+declarative, goal, manual and problem buffers). A human can
+actually only handle one thing at once. So when analysts are
+tasked with several simultaneous tasks or roles, this will
+definitively produce bad quality results. This is really
+important to understand to all cyber security seniors and
+designers, so read the book.
+
+Back to how this applies in practical terms: when analysts
+manually analyse and decide by expert knowledge, classifying
+the attributes of full content data and e.g. creates Yara
+and Snort signatures, it is a reasonable assumption that a
+number of relevant attributes are never evaluated as
+potential anomalies. This greatly increases the
+possibilities of the threat groups. In aerospace cognitive
+automation there is a concept called Mission Management,
+that is similar to the problem described here.
+
+Now for a practical example of how cognitive automation can
+work, this time paralleled with the approach taken by
+Netflix to movie recommenders. Let's say that you have
+stored the PDFiD [2] vector of all PDF documents over the
+last ten years, passing through a network. The vector
+structure will look like:
+
+```
+obj,endobj,stream,endstream,xref,trailer,startxref,/Page,/Encrypt,/JS,/JavaScript,/AA,/OpenAction,/JBIG2Decode
+```
+
+or:
+
+```
+1. 7,7,1,1,1,1,1,1,0,1,1,0,1,0
+[...]
+```
+
+If 500 PDF files passes through the systems each day on
+average, that will be 1825' documents over those ten
+years. In addition qtime is a significant part of that
+vector - and other parameters could be file names and so on.
+
+If an analyst receives a suspicious PDF file. That file may
+initially hard to classify by the analyst. In such a case
+the system should propose other related files to look
+at. Practically speaking this saves the analyst cognitive
+capacity to use instict, pattern recognition and creativity
+to classify the document. The machine on the other hand
+maintains objectivity, has great stress resistance, can
+retrieve a lot more information, and it can process and
+pivot on all those 10 years of documents as opposed to the
+analyst.
+
+Now that you have gotten an introduction to the world of
+cognitive automation, I hope this will drive a discussion on
+how we can take our field to the next level. I am confident
+that this means understanding and solving problems before
+attempting to buy our way out of them.
+
+
+[1] Schulte, D. A. 2002. Mission management and crew assistance for military aircraft: cognitive concepts and prototype evaluation.  
+[2] PDFiD: https://blog.didierstevens.com/2009/03/31/pdfid/  
+
+
+
--- a/data/converting-pst.md
+++ b/data/converting-pst.md
@ -0,0 +1,100 @@
+Some time ago I gave an introduction to converting Microsoft
+MSG files [1] to a readable RFC 2822 [2] format on Linux. In
+fact you will sometimes get an even kinkier format to work
+with: The Outlook Data File (PST) [3]. PST files is a
+proprietary format used by Microsoft Outlook, and is the
+equivalent of the mbox on Linux.
+
+**Edit August 29th**: Also have a look at the more
+up-to-date [4].
+
+Even though PST files are a bit harder to read than single
+EML files, there is hope if you only have a Linux client:
+libpst, and more specifically readpst. For libpst you need
+three libraries: 
+
+* ``libgsf`` (i/o library that can read and write common file
+types and handle structured formats that provide
+file-system-in-a-file semantics)
+* boost (portable C++ source libraries) 
+* libpst
+
+On OS X you can install it by: 
+
+```
+brew install libgsf
+brew install boost
+brew install libpst
+```
+
+Now if you have a pst archive, like [5] for instance, you can
+convert it by:
+
+    mkdir export
+    readpst -M -b -e -o export "Personal Folders.pst"
+
+This should give an output like this:
+
+    Opening PST file and indexes...
+    Processing Folder "Deleted Items"
+    Processing Folder "Inbox"
+    Processing Folder "latest"
+    [...]
+    Processing Folder "Reports"
+    	"Reports" - 11 items done, 1 items skipped.
+    Processing Folder "Quotes"
+    	"Quotes" - 1 items done, 1 items skipped.
+    Processing Folder "Printer"
+    	"Printer" - 1 items done, 1 items skipped.
+    Processing Folder "Passwords"
+    	"Passwords" - 6 items done, 1 items skipped.
+    [...]
+    Processing Folder "Kum Team"
+    	"Kum Team" - 37 items done, 0 items skipped.
+    	"9NT1425(India 11.0)" - 228 items done, 1 items skipped.
+    Processing Folder "Jimmi"
+    	"Jimmi" - 31 items done, 0 items skipped.
+    	"Inbox" - 27 items done, 11 items skipped.
+    Processing Folder "Outbox"
+    Processing Folder "Sent Items"
+    	"Sent Items" - 0 items done, 1 items skipped.
+    Processing Folder "Calendar"
+    	"Calendar" - 0 items done, 6 items skipped.
+    Processing Folder "Contacts"
+    	"Contacts" - 0 items done, 1 items skipped.
+    [...]
+    Processing Folder "Drafts"
+    Processing Folder "RSS Feeds"
+    Processing Folder "Junk E-mail"
+    Processing Folder "quarantine"
+	    "My Personal Folder" - 13 items done, 0 items skipped.
+
+Which creates a directory structure like ``ls -l 'export/My
+Personal Folder'``:
+
+    drwxr-xr-x   2 -  staff   68 Aug 28 21:34 Calendar
+    drwxr-xr-x   2 -  staff   68 Aug 28 21:34 Contacts
+    drwxr-xr-x  29 -  staff  986 Aug 28 21:34 Inbox
+    drwxr-xr-x   2 -  staff   68 Aug 28 21:34 Journal
+    drwxr-xr-x   2 -  staff   68 Aug 28 21:34 Sent Items
+    drwxr-xr-x   2 -  staff   68 Aug 28 21:34 Tasks
+
+If you sample ``Inbox/Mails/``, you will find:
+
+    1.eml	10.eml	11.eml	12.eml	13.eml	14.eml	15.eml	16.eml	17.eml	2.eml	3.eml	4.eml	5.eml	6.eml	7.eml	8.eml	9.eml
+
+You can now continue with our previous post [6]. I'll also
+encourage you to have a look at the documentation of the
+Outlook PST format [7].
+
+
+[1] Converting Microsoft MSG files: /2013-10-08-msg-eml.html  
+[2] RFC 2822: http://tools.ietf.org/html/rfc2822  
+[3] The Outlook Data File (PST): http://office.microsoft.com/en-001/outlook-help/introduction-to-outlook-data-files-pst-and-ost-HA010354876.aspx  
+[4] libpff: /converting-pst-archives-in-os-xlinux-with-libpff  
+[5] Example PST file: http://sourceforge.net/projects/pstfileup/files/Personal%20Folders.pst/download  
+[6] Reading MSG and EML Files on OSX/Linux Command Line: :4443/forensics/reading-msg-files-in-linux-command-line/  
+[7] The outlook.pst format: http://www.five-ten-sg.com/libpst/rn01re05.html  
+
+
+
--- a/data/gpg-openssl.md
+++ b/data/gpg-openssl.md
@ -0,0 +1,126 @@
+## Key Takeaways
+
+* PGP are replaceable with native OpenSSL RSA public key crypto
+  and AES-256 keys.
+* This approach simplifies crypto operations, and only requires
+  OpenSSL which is widely available.
+* Existing PGP keys stored in GnuPG work with OpenSSL via `gpgsm`.
+
+## Introduction
+
+The rabbit hole mission of mine to get rid of PGP continues.
+
+Lately I have been looking into converting PGP keys from GnuPG to
+OpenSSL. This way I can send encrypted data to people not using my
+OpenSSL-only approach. After all, most people still depend on PGP
+and it is the format they publish their public keys in.
+
+## Exporting A PGP Public Key for Encryption Using OpenSSL
+
+A PGP key cannot be directly read by OpenSSL, but GPG can natively
+export to SSH and ssh-keygen to PKCS8:
+
+```
+gpg --export-ssh-key <key-id>! > /tmp/test.pub
+ssh-keygen -f /tmp/test.pub -e -m PKCS8 > /tmp/test.pem
+```
+
+The above pubkey can be used to encrypt data with OpenSSL as shown
+on my [contact page](https://contact.252.no):
+
+```
+KEY=`openssl rand -hex 32` IV=`openssl rand -hex 16`
+ENCRYPTED_KEY_B64=`openssl pkeyutl -encrypt -pubin -inkey /tmp/test.pem -pkeyopt rsa_padding_mode:oaep <<< $KEY|base64`
+BLOB=`openssl enc -aes-256-cfb -a -e -K ${KEY} -iv ${IV} -in some-file`
+echo "PKCS11-VAULT;aes-256-cfb;rsa_padding_mode:oaep;$ENCRYPTED_KEY_B64:$IV:$BLOB;" > encrypted.txt
+```
+
+The steps of the above are:
+
+1. Create an initialization vector [1] and an encryption key
+2. Encrypt the one-time key to test.pem (our exported PGP-key)
+3. Encrypt `some-file` using the key and IV using 256 bits AES in CFB-mode
+4. Format the output in my PV-format.
+
+Store `encrypted.txt` for decryption in the next section.
+
+## Exporting a PGP Private Key for Decryption Using OpenSSL
+
+This part is a bit more complex. For the sake of an example, let
+us say you received an encrypted blob with an IV and encrypted
+key, using the approach shown in the former section. You have the
+key stored in GnuPG.
+
+`gpgsm` can export your private key to p12, which is readable for
+OpenSSL [2].
+
+First list your secret keys in the GnuPG store: `gpg
+--list-secret-keys --with-keygrip`.
+
+Convert the key to X.509 by: `gpgsm --gen-key -o
+/tmp/temp.crt`. You need to fill the values requested:
+
+* Select "existing key"
+* Fill the keygrip from the GPG secret key listing. Make sure you
+  use the right key, since GPG generates several keys behind the
+  scenes (the encryption key)
+* Fill the cn (this needs to be on the format "cn=...") and e-mail
+* Accept the other values as empty and accept the creation
+
+Now import the certificate into `gpgsm`: `gpgsm --import
+/tmp/temp.crt`. When imported, find the key ID by: `gpgsm
+--list-keys`.
+
+Using the key ID, you can now export the key in p12-format. 
+
+```
+gpgsm -o /tmp/$keyid.p12 --export-secret-key-p12 $keyid
+openssl pkcs12 -in /tmp/$key.p12 -nodes -nocerts|tail -n +5 > /tmp/$key.key
+```
+
+You only need to do the conversion once and now have your key in
+`/tmp/$key.key`. This should be secured accordingly, and have a
+password set as is offered in the guidance by gpgsm.
+
+The resulting `/tmp/$key.key` is usable for decrypting content
+encrypted by the public key. To decrypt the data in `encrypted.txt`:
+
+```
+IFS=';' read IDENTIFIER ALGORITHM PADDING_MODE ENCRYPTION_BLOBS SIGNATURE < encrypted.txt
+
+for BLOB in ${ENCRYPTION_BLOBS[@]}; do
+    IFS=':' read ENCRYPTED_KEY_B64 IV TEXTFILE_ENC <<< $BLOB
+    ENCRYPTED_KEY=`printf $ENCRYPTED_KEY_B64 | base64 -d`
+    decrypted=false
+    DECRYPTED_KEY=`echo $ENCRYPTED_KEY_B64 |base64 -d | openssl pkeyutl -decrypt -inkey /tmp/$key.key -pkeyopt ${PADDING_MODE} 2> /dev/null` && decrypted=true
+    if [ $decrypted != false ]; then
+        TEXTFILE_DEC=`printf %s "$TEXTFILE_ENC"|base64 -d|openssl enc -$ALGORITHM -d -K "$DECRYPTED_KEY" -iv "$IV" |base64`
+        break
+    fi
+done
+
+echo $TEXTFILE_DEC
+```
+
+The above format supports encryption to multiple parties. It:
+
+1. Reads the PV-format into variables
+2. Loops through the encryption blobs (one pass if one recipient)
+3. Decrypts the key with the private key generated from `gpgsm`
+4. Using the IV and decrypted key, decrypts the content, which is
+   eventually the same as in the previous section's `some-file`
+5. Prints the decrypted content
+
+## Conclusion
+
+It is possible to convert PGP keys to use with OpenSSL via `gpgsm`.
+
+Since OpenSSL is more widely distributed and installed than GnuPG,
+it is a method applicable in more environments.
+
+Using OpenSSL instead of GnuPG provides more options, and reduces
+the complexity of cryptography (since GnuPG has lots of options).
+
+[1] https://stackoverflow.com/questions/39412760/what-is-an-openssl-iv-and-why-do-i-need-a-key-and-an-iv  
+
+[2] https://superuser.com/a/1414277
--- a/data/graph-experiment.md
+++ b/data/graph-experiment.md
@ -0,0 +1,103 @@
+I currently maintain this threat database, and up until now I've
+generated the graph data for d3 using queries, and a lot of logic,
+in a MySQL-database. That is going to change pretty soon. You
+might also remember when we did Social Network Analysis and Object
+Attribution with Maltego 3 [1].
+
+In my seeking for understanding the Apache Hadoop ecosystem I all
+of a sudden got a brutal meeting with Java (Eclipse huh..). I also
+discovered that there are a world of libraries and applications
+previously unknown to me. One of them is the über-awesome Neo4j,
+which is a graph database originally built for Java - but guess
+what: It's got a REST API as well. As usual you don't have to
+write the Python code yourself, someone already wrote it for
+you. Note that it only does Python 2 for now [2,3].
+
+The coolest thing about Neo4j is Cypher [5]: Cypher is a "graph
+query language" as they put it themselves. With Cypher you can
+express what you look for in an entirely other way than you would
+do in a relational database, it's actually easy.
+
+And: You of course need the database running as well. If you use a
+Debian system like me your in luck since they have an experimental
+version out there [5].
+
+Enough talk, here is a simple example of how you could go about it
+in regard to scripting the relations considering threat
+intelligence in order to connect groups to incidents. The goal
+would be to find peripherally connected groups.
+
+    from GraphConn.Connect import Graph
+    g = Graph()
+
+    # create groups
+    g.cGroup("ThreatA")
+    g.cGroup("ThreatB")
+    g.cGroup("ThreatC")
+
+    # create incidents
+    g.cIncident("IncA")
+    g.cIncident("IncB")
+    g.cIncident("IncC")
+
+    # relate groups in some way to each other through incidents
+    g.link("ThreatA","IncA")
+    g.link("ThreatA","IncB")
+    g.link("ThreatB","IncC")
+    g.link("ThreatC","IncA")
+    g.link("ThreatB","IncB")
+
+    # find all threats related to Threat A through incidents
+    print g.fRelated("ThreatA")
+
+You might find this simple, but if you've ever tried to do it in
+SQL you know why you'll need it. Also, remember that this scales
+indefinite to other entity types as well.
+
+Here's the class used to generate the graph, for reference (feel
+free to copy it, produce something cool and post it back in the
+comment field):
+
+    from neo4jrestclient import client
+    from neo4jrestclient.client import GraphDatabase
+    from neo4jrestclient.query import Q
+
+    class Graph:
+        def __init__(self):
+            self.gdb = GraphDatabase("http://localhost:7474/db/data/")
+            self.nodes = []
+
+        def cGroup(self,name):
+            n = self.gdb.nodes.create(name=name, type='Group')
+            self.nodes.append(n)
+
+        def cIncident(self,name):
+            n = self.gdb.nodes.create(name=name, type='Incident')
+            self.nodes.append(n)
+
+        def link(self,n1,n2):
+            try:
+                l = (Q("name", iexact=n1)); n1 = self.gdb.nodes.filter(l)[0];
+                l = (Q("name", iexact=n2)); n2 = self.gdb.nodes.filter(l)[0];
+                return n1.relationships.create("Executed", n2)
+            except:
+                return False
+
+        def fRelated(self,query):
+            l = (Q("name", iexact=query))
+            n = self.gdb.nodes.filter(l)[0]
+            r = n.traverse()
+            for n2 in r:
+                for e in n2.traverse():
+                    r.append(e)
+            return list(r)
+
+I really hope you enjoy this as much as me right now. The Facebook
+Graph Search for the rest of us.
+
+
+[1] gopher://secdiary.com/0/post/sna-oa-maltego/index.txt
+[2] https://pypi.python.org/pypi/neo4jrestclient/
+[3] https://neo4j-rest-client.readthedocs.org/en/latest/elements.html
+[4] http://www.neo4j.org/learn/cypher
+[5] http://debian.neo4j.org/
--- a/data/graphs-scale.md
+++ b/data/graphs-scale.md
@ -0,0 +1,82 @@
+Following up on my post yesterday, I have also been looking at
+graphs the other way - from a scalable database to a manageable
+graph involving e.g. just one segment.
+
+There are currently two ways to do this:
+
+1) Export the graph, and 2) streaming the graph from and to the
+graph database. The first option is obviously the simple one, but
+doesn't always make up for our needs. The latter option is often
+the case when you work multiple analysts at the same graph.
+
+
+## Option 1: Exporting the Graph
+
+To achieve the first you can use the GraphML save function of
+Gremlin.
+
+    conf = new BaseConfiguration();
+    conf.setProperty("storage.backend","hbase");
+    conf.setProperty("storage.hostname","sandbox.hortonworks.com");
+    conf.setProperty("storage.port","2181");
+    g = TitanFactory.open(conf);
+    g.saveGraphML('test.graphml')
+
+This graph can again be opened in tools such as Gephi.
+
+You can also use the Gephi database API plugin for
+Rexster. There's a Blueprints repo [1] which extends that. Short
+how-to on how to get going with the Gephi development environment,
+from the wiki-pages of the plugin [2]:
+
+1. Get plugins from [3], and [4]
+2. Open Gephi, go to ``Tools > Plugins > Downloaded > "Add
+   Plugins..."``
+3. Press install and follow the guidance, at the end you should
+   restart Gephi
+4. Go to File > Import Database
+5. Add the Rexster configuration to ``/etc/graph/rexster.xml`` (if
+   when importing the database issues arises, look at [5]
+
+``rexster.xml`` should look like this:
+
+    <graph>
+        <graph-name>RexterGraph</graph-name>
+        <graph-type>com.tinkerpop.rexster.config.RexsterGraphGraphConfiguration</graph-type>
+        <graph-buffer-size>100</graph-buffer-size>
+        <graph-location>http://192.168.109.128:8182/graphs/titan</graph-location>
+    </graph>
+
+You should be left with something like this for instance in Gephi:
+
+![A Rexster Graph Import to Gephi, from a Titan database. The graph consists of a variety of segments, such as articles from a article-system and imported Maltego graphs](/static/img/data/rexster-import-gephi.png)
+
+A Rexster Graph Import to Gephi, from a Titan database. The graph
+consists of a variety of segments, such as articles, imported
+Maltego graphs and such.
+
+A Rexster Graph Import to Gephi, from a Titan database. The graph
+consists of a variety of segments, such as articles from a
+article-system and imported Maltego graphs
+
+Here's the cluster on the right there by the way. There's some
+interesting patterns inside there it seems, so I suspect it's from
+a Maltego graph:
+
+![](/static/img/data/gephi-cluster-maltego.png)
+
+## Option 2: The Gephi Streaming API
+
+For the other option I found the Gephi graph streaming API
+[6]. This one I currently found a little limited in that it can
+only provide collaboration between two Gephi instances using a
+Jetty web-server. It's pretty cool, but doesn't offer the
+integration I am looking for. I'll get back to this later.
+
+[1] https://github.com/datablend/gephi-blueprints-plugin
+[2] https://github.com/datablend/gephi-blueprints-plugin/wiki
+[3] https://github.com/downloads/datablend/gephi-blueprints-plugin/org-gephi-lib-blueprints.nbm
+[4]
+https://github.com/downloads/datablend/gephi-blueprints-plugin/org-gephi-blueprints-plugin.nbm
+[5] https://github.com/datablend/gephi-blueprints-plugin/issues/1
+[6] https://marketplace.gephi.org/plugin/graph-streaming/
--- a/data/indicators.md
+++ b/data/indicators.md
@ -0,0 +1,463 @@
+Over what have become some years, cyber security
+professionals have been working on optimising the sharing of
+information and knowledge. A lot of the efforts have
+recently been focused around intelligence- and data-driven
+teams. Today many of these discussions have ended evolving
+around something related to the STIX format.
+
+> Don't use a lot where a little will do  
+> – Unknown origin
+
+This post features a perspective of the potential of today's
+standard-oriented approach for documenting indicator sets
+related to cyber security threat actors and incidents. It
+turns out we have a longer way to go than expected.
+
+For the purpose of this article, an indicator is a
+characteristic or evidence of something unwanted, or hostile
+if you'd like. I like to refer to the military term
+"Indicators & Warnings" in this regard. In other words, an
+indicator isn't necessarily limited to the cyber domain
+alone either. Physical security could be in an even worse
+condition than cyber security when it comes to expressing
+threat indicators. I'll leave the cross-domain discussion
+for another time.
+
+## Up Until Today
+
+Multiple standards have evolved and disappeared, and one
+that I have been in favor of previously is the OpenIOC 1.1
+standard. However, times are changing, and so are the
+terminology and breadth of how we are able to express the
+intrusion sets.
+
+Even though OpenIOC was a very good start, and still is as
+far as I am concerned, it has far been surpassed Cybox and
+ultimately STIX [1] in popularity.
+
+STIX is a container, a quite verbose XML format (which is
+turning JSON in 2.0). Cybox is the artefact format [2], for
+malware you have MAEC [3] and so on. Basically it's a set of
+projects collaborating.
+
+This all sounds good, right? Not quite. Have a look at the
+OpenIOC to STIX repository on Github [4] and you will find
+that ``stuxnet.stix.xml`` is 202 lines of XML code for 18
+atomic indicators. OpenIOC on the other hand, is 91 lines,
+and that is a verbose format as well. In fact the overhead
+ratio of the STIX file is about 10:1, while OpenIOC is about
+5:1.
+
+To add to the mind-blowing inefficiency I have yet to see,
+on a regular basis, complex and nested expressions of an
+actor or a campaign in the STIX format.
+
+Before you continue, do a simple Google search for "STIX
+editor" and "cybox editor". Do it now, and while you are at
+it google for "openioc editor" as well. Hello guys, these
+standards have been going around for many years. So, how
+should we interpret that there aren't any user friendly
+approaches to using them? The closest I've come is through
+MISP, and that is generally speaking not using these
+standards for their internal workings either. This one on
+the MISP GitHub issue tracker says it all: STIX 2.x support
+(MISP) [5].
+
+I'm sure that some may disagree with the above statements,
+calling out the infancy of these formats. However, they
+can't be said to be new standards anymore. They are just too
+complex. One example of such is the graph-oriented relations
+implemented into the formats. Why not just let a graph
+database take care of these instead?
+
+This is not just a post to establish the current state. How
+would a better approach look?
+
+## What Is The Problem to Be Solved?
+
+Back to where things have gone since the OpenIOC 1.1/atomic
+indicator days. The most promising addition, in my opinion,
+is the MITRE PRE-ATT&CK and ATT&CK frameworks. The two
+frameworks builds on a less structured approach than seen
+for atomic indicators (Lockheed's Kill-Chain). The latter
+can for instance be viewed in form of the Intelligence
+Pyramid.
+
+The Intelligence Pyramid's abstraction levels can be mapped
+against what it is supposed to support when it comes to
+indicators like the following:
+
+    | Level of abstraction  |    | Supports
+    |-----------------------|----|-------------
+    | Behavior              |    | Knowledge
+    |-----------------------|--->|-------------
+    | Derived               |    | Information
+    |-----------------------|--->|-------------
+    | Atomic                |    | Data
+
+The purpose of the abstration layer is in this case to
+support assessments and measures at the corresponding
+contextual level. For instance a technical report tailored
+to an Incident Response Team (IRT) generally concerns
+Derived and Atomic indicators, while an intelligence report
+would usually be based on the Behavioural level.
+
+Having covered the abstraction layers, we can recognize that
+OpenIOC (or Cybox and MAEC) covers the bottom layers of
+abstration, while MITRE (PRE-)ATT&CK in its current form is
+mostly about the Behaviour level.
+
+For Derived indicators there are primarily two
+well-established, seasoned and successful formats that have
+become standards through its widespread usage. This is
+amongst others caused by the indicators and rules being
+effective, rapid, easy and pleasing to write.
+
+First we have Snort/Suricata rules and Lua scripts which was
+designed for network detection. For Snort/Suricata I'd say
+that most of what is detected of metadata today is probably
+expressable in OpenIOC (except for the magic that can be
+done with Lua). Second there is the Yara format which has
+become known for its applicability against malicious
+files. The simplicity of both formats is obviously due to
+their power of expression. Thus, I'd say that Yara and
+Snort/Suricata formats is the ones to look for when it comes
+to content and pattern detection.
+
+> Indicators should be easy and pleasing to write.
+
+To summarize the above, each of the formats can be mapped to
+an abstraction level:
+
+    | Level of abstraction  |    | Formats
+    |-----------------------|----|-------------
+    | Behavior              |    | MITRE (PRE-)ATT&CK
+    |-----------------------|--->|-------------
+    | Derived               |    | Suricata+Lua, Yara
+    |-----------------------|--->|-------------
+    | Atomic                |    | OpenIOC 1.1
+
+
+Going through my notes on how I document my own indicators I
+also found that I use the CVE database, datetimes,
+confidence, analyst comments for context and classification
+as well (the latter being irrelevant for detection).
+
+One of the major problems is: everything that is currently
+out there breaks the analyst workflow. You either need to
+log in to some fancy web interface, edit XML files (god
+forbid) or you would just jot down everything in a text
+file. The text file seems to be the natural fallback in
+almost any instance. I have even attempted to use the very
+good initiative by Yahoo, PyIOCe, and Mandiant's
+long-forgotten IOC Editor. These projects have both lost
+tracktion, as almost every other intiative in this space. So
+that is right folks, the text editor is still the preferred
+tool in 2018, and let's face it: indicators should be
+pleasing to design and create - like putting your signature
+to an incident or a job well done.
+
+> an indicator set should be for humans and machines by
+  humans
+
+After all, the human is the one that is going to have to
+deal with the indicator sets at some point, and we are the
+slowest link. So let us not slow ourselves down more than
+necessary. At this point I would like to propose the golden
+rule of creating golden rules: an indicator set should be
+for humans and machines by humans.
+
+You may also have noticed that when all these standards
+suddendly are combined into one standard, they become less
+user-friendly. In other words, let us rather find back to
+our common \*NIX roots where each tool had a limited set of
+tasks.
+
+Graphs are essential when writing indicators. Almost
+everything in the world around us can be modelled as a
+network, and infiltration and persistence in cyberspace is
+no exception. Thus, an indicator format needs to be
+representable in a graph, and guess what? Almost everything
+are as long as it maintains some kind of structure.
+
+For graphs there are two ways of going about the problem:
+
+1) Implement the graph in the format  
+
+2) Make sure that you have a good graph backend and a
+automatable and traversable format available  
+
+For option 1, the graph in the format will increase the
+complexity significantly. Option 2 results in the opposite,
+but that does not mean that it can't be converted to a
+graph. To make an elaborate discussion short, this is what
+we have graph databases for, such as Janusgraph [6].
+
+
+## A Conceptual View
+
+Summarizing the above, I'd like to propose the following
+requirements for indicator formats:
+
+1) Indicator sets should be easy and inviting to create
+
+2) You should be able to start writing at any time, when you
+need it
+
+3) Unnecessary complexity should be avoided
+
+4) The format should be human readable and editable
+
+5) A machine should be able to interpret the format
+
+6) Indicator sets should be graph compatible
+
+With a basis in this article, I believe that the best
+approach is to provide a basic plain text format
+specification that inherits from the OpenIOC 1.1 and MITRE
+frameworks and references other formats where necessary.
+
+Let us imagine that we found an IP address in one
+situation. The IP-address was connected to a domain that we
+found using passive DNS. Further, it was found that a
+specific file was associated with that domain through a
+Twitter comment. Representing the given information in its
+purest (readable) form looks like the following:
+
+    // a test file
+    class                  tlp:white
+    date                   2018/02/18
+    ipv4          low      188.226.130.166
+      domain      med      secdiary.com
+      technique            PRE-T1146
+        filename  med      some_filename.docx
+        comment            found in open sources
+      
+To recap some of the previous points: the above format is
+simple, it can be written at any time based on knowledge of
+well known standards. The best of it all is that if you are
+heavily invested in specific formats, it can be converted to
+them all using a simple interpreter traversing the format.
+
+Further, such a format is easily converted into a tree and
+can be loaded into a graph for traversing and automated
+assessments. Each confidence value can be quantified
+(``low=0.33``, ``med=0.66``, ``high=1.0``). That said,
+simplicity in this case equals actionable indicators.
+
+    | v: 188.226.130.166 (0.33)    | match    | 
+    | e                            |          | 
+    | v: secdiary.com (0.66)       | no match | (0.33+0.66)/2=0.5
+    | e                            |          | 
+    | v: some_filename.docx (0.66) | match    | 
+    
+For networks vs hierarchies: a drawback of the latter, as
+mentioned in the former section, is the lack of
+e.g. multiple domains being connected to different other
+vertices. A practical solution goes as follows:
+
+    ipv4      low    188.226.130.166
+      domain  med    secdiary.com
+    domain    low    secdiary.com
+      ipv4      low    128.199.56.232
+
+The graph receiving the above indicator file should identify
+the domain as being a unique entity and link the two IP
+addresses to the same domain:
+
+    | v: 188.226.130.166 (0.33)
+    | e: 0.5
+    | v: secdiary.com (0.5)
+    | e: 0.33
+    | v: 128.199.56.232 (0.33)
+
+As for structuring the indicator format for machines in the
+practical aspect, consider the following pseudocode:
+
+    indicators = [(0,'ipv4','low','188.226.130.166'),...]
+    _tree = tree(root_node)
+    for indicator in indicators
+      depth = indicator[0]
+      _tree.insert(indicator,depth)
+      
+Now that we have the tree represented in code, it is
+trivially traversable when loading it into some graph:
+
+    method load_indicators(node,depth):
+      graph.insert(node.parent,edge_label,node)
+      for child in node.children
+        load_indicator(child,depth+1)
+    
+    load_indicators(tree,0)
+
+## Summary
+
+Hopefully I did not kill too many kittens with this
+post. You may or may not agree, but I do believe that most
+analysts share at least parts of my purist views on the
+matter.
+
+We are currently too focused on supporting standards and
+having everyone use as few of them as possible. I believe
+that energy is better used on getting more consistent in the
+way we document and actually exchange more developed
+indicator sets than the md5 hash- and domainlists that are
+typically shared today ("not looking at these kinds of files
+at all" - even though it's not the worst I've seen:
+``MAR-10135536-F_WHITE_stix.xml`` [7]).
+
+In the conceptual part of this article I propose a simple
+but yet effective way of representing indicators in a
+practical manner. Frankly, it is even too simple to be
+novel. It is just consistent and intutitive.
+
+PS! For the STIX example above, have a look at the following
+to get a feel with the actual content of the file (used one
+of the mentioned specimens to show the point):
+
+    class             tlp:white
+    date              2018/02/05
+
+    sha1          high    4efb9c09d7bffb2f64fc6fe2519ea85378756195
+      comment             NCCIC:Observable-724f9bfe-1392-456e-8d9b-c143af15f8d4
+      comment             did not convert all attributes
+      compiler            Microsoft Visual C++ 6.0
+      md5         high    3dae0dc356c2b217a452b477c4b1db06
+      date                2016-01-29T09:21:46Z
+      entropy     med     6.65226708818
+      #sections   low     5
+      intname     med     ProxyDll.dll
+      detection   med     symantec:Heur.AdvML.B
+
+The original document states for those same indicators in no less than 119 lines
+with an overhead ratio of about 1:5 (it looks completely insane):
+
+    <stix:Observables cybox_major_version="2" cybox_minor_version="1" cybox_update_version="0">
+        <cybox:Observable id="NCCIC:Observable-724f9bfe-1392-456e-8d9b-c143af15f8d4">
+            <cybox:Object id="NCCIC:WinExecutableFile-bb9e38d1-d91c-4727-ab6a-514ecc0c02a2">
+                <cybox:Properties xsi:type="WinExecutableFileObj:WindowsExecutableFileObjectType">
+                    <FileObj:File_Name>3DAE0DC356C2B217A452B477C4B1DB06</FileObj:File_Name>
+                    <FileObj:Size_In_Bytes>336073</FileObj:Size_In_Bytes>
+                    <FileObj:File_Format>PE32 executable (DLL) (console) Intel 80386, for MS Windows</FileObj:File_Format>
+                    <FileObj:Hashes>
+                        <cyboxCommon:Hash>
+                            <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
+                            <cyboxCommon:Simple_Hash_Value>3dae0dc356c2b217a452b477c4b1db06</cyboxCommon:Simple_Hash_Value>
+                        </cyboxCommon:Hash>
+                        <cyboxCommon:Hash>
+                            <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SHA1</cyboxCommon:Type>
+                            <cyboxCommon:Simple_Hash_Value>4efb9c09d7bffb2f64fc6fe2519ea85378756195</cyboxCommon:Simple_Hash_Value>
+                        </cyboxCommon:Hash>
+                        <cyboxCommon:Hash>
+                            <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SHA256</cyboxCommon:Type>
+                            <cyboxCommon:Simple_Hash_Value>8acfe8ba294ebb81402f37aa094cca8f914792b9171bc62e758a3bbefafb6e02</cyboxCommon:Simple_Hash_Value>
+                        </cyboxCommon:Hash>
+                        <cyboxCommon:Hash>
+                            <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SHA512</cyboxCommon:Type>
+                            <cyboxCommon:Simple_Hash_Value>e52b8878bd8c3bdd28d696470cba8a18dcc5a6d234169e26a2fbd9862b10ec1d40196fac981bc3c5a67e661cd60c10036321388e5e5c1f60a7e9937dd71fadb1</cyboxCommon:Simple_Hash_Value>
+                        </cyboxCommon:Hash>
+                        <cyboxCommon:Hash>
+                            <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SSDEEP</cyboxCommon:Type>
+                            <cyboxCommon:Simple_Hash_Value>3072:jUdidTaC07zIQt9xSx1pYxHvQY06emquSYttxlxep0xnC:jyi1XCzcbpYdvQ2e9g3kp01C</cyboxCommon:Simple_Hash_Value>
+                        </cyboxCommon:Hash>
+                    </FileObj:Hashes>
+                    <FileObj:Packer_List>
+                        <FileObj:Packer>
+                            <FileObj:Name>Microsoft Visual C++ 6.0</FileObj:Name>
+                        </FileObj:Packer>
+                        <FileObj:Packer>
+                            <FileObj:Name>Microsoft Visual C++ 6.0 DLL (Debug)</FileObj:Name>
+                        </FileObj:Packer>
+                    </FileObj:Packer_List>
+                    <FileObj:Peak_Entropy>6.65226708818</FileObj:Peak_Entropy>
+                    <WinExecutableFileObj:Headers>
+                        <WinExecutableFileObj:File_Header>
+                            <WinExecutableFileObj:Number_Of_Sections>5</WinExecutableFileObj:Number_Of_Sections>
+                            <WinExecutableFileObj:Time_Date_Stamp>2016-01-29T09:21:46Z</WinExecutableFileObj:Time_Date_Stamp>
+                            <WinExecutableFileObj:Size_Of_Optional_Header>4096</WinExecutableFileObj:Size_Of_Optional_Header>
+                            <WinExecutableFileObj:Hashes>
+                                <cyboxCommon:Hash>
+                                    <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
+                                    <cyboxCommon:Simple_Hash_Value>e14dca360e273ca75c52a4446cd39897</cyboxCommon:Simple_Hash_Value>
+                                </cyboxCommon:Hash>
+                            </WinExecutableFileObj:Hashes>
+                        </WinExecutableFileObj:File_Header>
+                        <WinExecutableFileObj:Entropy>
+                            <WinExecutableFileObj:Value>0.672591739631</WinExecutableFileObj:Value>
+                        </WinExecutableFileObj:Entropy>
+                    </WinExecutableFileObj:Headers>
+                    <WinExecutableFileObj:Sections>
+                        <WinExecutableFileObj:Section>
+                            <WinExecutableFileObj:Section_Header>
+                                <WinExecutableFileObj:Name>.text</WinExecutableFileObj:Name>
+                                <WinExecutableFileObj:Size_Of_Raw_Data>49152</WinExecutableFileObj:Size_Of_Raw_Data>
+                            </WinExecutableFileObj:Section_Header>
+                            <WinExecutableFileObj:Entropy>
+                                <WinExecutableFileObj:Value>6.41338619924</WinExecutableFileObj:Value>
+                            </WinExecutableFileObj:Entropy>
+                            <WinExecutableFileObj:Header_Hashes>
+                                <cyboxCommon:Hash>
+                                    <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
+                                    <cyboxCommon:Simple_Hash_Value>076cdf2a2c0b721f0259de10578505a1</cyboxCommon:Simple_Hash_Value>
+                                </cyboxCommon:Hash>
+                            </WinExecutableFileObj:Header_Hashes>
+                        </WinExecutableFileObj:Section>
+                        <WinExecutableFileObj:Section>
+                            <WinExecutableFileObj:Section_Header>
+                                <WinExecutableFileObj:Name>.rdata</WinExecutableFileObj:Name>
+                                <WinExecutableFileObj:Size_Of_Raw_Data>8192</WinExecutableFileObj:Size_Of_Raw_Data>
+                            </WinExecutableFileObj:Section_Header>
+                            <WinExecutableFileObj:Entropy>
+                                <WinExecutableFileObj:Value>3.293891672</WinExecutableFileObj:Value>
+                            </WinExecutableFileObj:Entropy>
+                            <WinExecutableFileObj:Header_Hashes>
+                                <cyboxCommon:Hash>
+                                    <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
+                                    <cyboxCommon:Simple_Hash_Value>4a6af2b49d08dd42374deda5564c24ef</cyboxCommon:Simple_Hash_Value>
+                                </cyboxCommon:Hash>
+                            </WinExecutableFileObj:Header_Hashes>
+                        </WinExecutableFileObj:Section>
+                        <WinExecutableFileObj:Section>
+                            <WinExecutableFileObj:Section_Header>
+                                <WinExecutableFileObj:Name>.data</WinExecutableFileObj:Name>
+                                <WinExecutableFileObj:Size_Of_Raw_Data>110592</WinExecutableFileObj:Size_Of_Raw_Data>
+                            </WinExecutableFileObj:Section_Header>
+                            <WinExecutableFileObj:Entropy>
+                                <WinExecutableFileObj:Value>6.78785911234</WinExecutableFileObj:Value>
+                            </WinExecutableFileObj:Entropy>
+                            <WinExecutableFileObj:Header_Hashes>
+                                <cyboxCommon:Hash>
+                                    <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
+                                    <cyboxCommon:Simple_Hash_Value>c797dda9277ee1d5469683527955d77a</cyboxCommon:Simple_Hash_Value>
+                                </cyboxCommon:Hash>
+                            </WinExecutableFileObj:Header_Hashes>
+                        </WinExecutableFileObj:Section>
+                        <WinExecutableFileObj:Section>
+                            <WinExecutableFileObj:Section_Header>
+                                <WinExecutableFileObj:Name>.reloc</WinExecutableFileObj:Name>
+                                <WinExecutableFileObj:Size_Of_Raw_Data>8192</WinExecutableFileObj:Size_Of_Raw_Data>
+                            </WinExecutableFileObj:Section_Header>
+                            <WinExecutableFileObj:Entropy>
+                                <WinExecutableFileObj:Value>3.46819043887</WinExecutableFileObj:Value>
+                            </WinExecutableFileObj:Entropy>
+                            <WinExecutableFileObj:Header_Hashes>
+                                <cyboxCommon:Hash>
+                                    <cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
+                                    <cyboxCommon:Simple_Hash_Value>fbefbe53b3d0ca62b2134f249d249774</cyboxCommon:Simple_Hash_Value>
+                                </cyboxCommon:Hash>
+                            </WinExecutableFileObj:Header_Hashes>
+                        </WinExecutableFileObj:Section>
+                    </WinExecutableFileObj:Sections>
+                </cybox:Properties>
+            </cybox:Object>
+        </cybox:Observable>
+
+
+
+[1] STIX: https://oasis-open.github.io/cti-documentation/  
+[2] Cybox example: https://github.com/CybOXProject/schemas/blob/master/samples/CybOX_IPv4Address_Instance.xml  
+[3] MAEC: https://maec.mitre.org/  
+[4] OpenIOC to STIX repository on Github: https://github.com/STIXProject/openioc-to-stix  
+[5] STIX 2.x support (MISP): https://github.com/MISP/MISP/issues/2046  
+[6] Janusgraph: http://janusgraph.org/  
+[7] MAR-10135536-F_WHITE_stix.xml: https://www.us-cert.gov/sites/default/files/publications/MAR-10135536-F_WHITE_stix.xml  
--- a/data/jnetpcap-tuning.md
+++ b/data/jnetpcap-tuning.md
@ -0,0 +1,84 @@
+It comes a time when programming that one will have to start
+paying attention to performance. As this is true in many cases,
+there are especially two places that is especially important: With
+parallel processing and packet captures. Even better if doing both
+at once. In this article we'll keep the latter in mind together
+with jNetPcap, a Java wrapper for libpcap able to do 60Kpps per
+instance.
+
+First of all I found an excellent post on performance tuning
+jNetPcap. There's also a good implementation example for moving to
+the much faster ``JBufferHandler`` [1].
+
+One should take note of the ring buffer, that is how much memory
+you will have to temporarily store packets if there's a lot of
+traffic. Usually this may be e.g. 453k, while the maximum can be
+4M (for instance 4078 as it was in my case). For tuning this on
+RedHat one may use ``ethtool -g eth0``, and adjust it with
+``ethtool -G eth0 rx 4078``. Larger buffers results in high
+throughput, but also higher latency (which is not that important
+when doing packet captures). More on ethtool and ring buffer
+adjustments here.
+
+When it comes to jNetPcap, the following is an example
+implementing it as a Apache Flume source [2]:
+
+    @Override
+    public void start() {
+        final ChannelProcessor channel = getChannelProcessor();
+
+        JBufferHandler<ChannelProcessor> jpacketHandler = new JBufferHandler<ChannelProcessor>() {
+
+            public void nextPacket(PcapHeader pcapHeader, JBuffer packet, ChannelProcessor channelProcessor) {
+            int size = packet.size();
+            JBuffer buffer = packet;
+            byte[] packetBytes = buffer.getByteArray(0, size);
+
+            Event flumeEvent = EventBuilder.withBody(packetBytes);
+            channel.processEvent(flumeEvent);
+            }
+        };
+
+        super.start();
+        pcap.loop(-1, jpacketHandler, channel);
+
+    }
+
+The above shows you a slightly different version than the most
+well-documented example (``PcapHandler``) [3]. You should choose
+the above one since it is much faster due to the packet
+referencing. I did a test on one site and the performance
+increased drastically in terms of improving packet loss on the
+software-side of things.
+
+Last but not least, in order to do software side performance
+monitoring, you might want to add a handler to capture statistics
+in jNetPcap. This is mentioned here in the jNetPcap forums as well
+[4]:
+
+> You can also use PcapStat to see if libpcap is dropping any
+> packets. If the buffer becomes full and libpcap can't store a
+> packet, it will record it in statistics. This is different from
+> the NIC dropping packets.
+
+This may be implemented in the configuration as shown here:
+
+    PcapStat stats = new PcapStat();
+    pcap = Pcap.openLive(device.getName(), SNAPLEN, Pcap.MODE_PROMISCUOUS, timeout, errbuf);
+    pcap.stats(stats);
+
+You can get the stats with the following:
+
+    System.out.printf("drop=%d, ifDrop=%d\n",stats.getDrop(), stats.getIfDrop());
+
+ 
+Hope this gets you up and running smoothly, tuning packet captures
+in chain with parallel computing is a challenge.
+
+To get some more context you may also like to have a look at the
+presentation that Cisco did on OpenSOC, that's how to do it.
+
+[1] http://jnetpcap.com/node/67
+[2] http://flume.apache.org/
+[3] http://jnetpcap.com/examples/dumper
+[4] http://jnetpcap.com/node/704
--- a/data/mac-mini-debian.md
+++ b/data/mac-mini-debian.md
@ -0,0 +1,173 @@
+There are a lot of guides on booting Linux on an Mac Mini, and the
+Mac Mini is absolutely great. There's also a lot of guides which
+takes some unnecessary steps on the way from the native OS X
+experience to the bloated, and difficult-to-setup Linux on OS
+X. Some of them are good on certain points though.
+
+So, not surprising, I will tell you how to make it work with both
+a native EFI installation and the Broadcom BCM4366 up and running.
+
+Everything will be done on the command line, so this will work
+great on servers as well. Of course you won't run wifi on the work
+server though (!).
+
+First, take note that this will wipe almost everything Apple from
+you box except the Firmware. You may roll back through pressing
+the ALT-key while booting.
+
+Second, you should use Debian 8.0 "Jessie" (which is currently in
+RC1). This is important since Wheezy doesn't support the Broadcom
+chipset.
+
+Prerequisites for this article are:
+
+* A Mac Mini, tested on an OCT 2014 model
+* A keyboard
+* A USB memory stick of at least 2GB (speed is the key)
+
+## 1. Install Debian - and Change Boot Order
+
+You should create a bootable USB stick for your Debian
+installation. When you've downloaded the ISO, you can make it
+bootable without hassle through Unetbootin [1]. That one works on
+OS X 10.10 "Yosemite" as well.
+
+When you've got that one ready insert it into the Mini, holding
+the ALT-key while booting. You will get to the boot menu, choose
+the "EFI" one. This will initiate GRUB from the stick.
+
+Do the installation as you would on any other machine. Since your
+mac is still setup to boot to OS X, we need to change that next in
+order to make it point to the Debian installation instead.
+
+When rebooting, get into the boot menu by holding the ALT-key
+again. Select that same GRUB menu again, _BUT_ instead of choosing
+to install it you should now press "c" to get to the GRUB command
+line.
+
+It is now time to locate the boot directory [2] on the right
+disk. Vary X (disk) and Y (partition table) until you find the
+right combination:
+
+    grub> ls (hdX,gptY)/boot/grub
+
+That may for instance result in:
+
+    grub> ls (hd2,gpt2)/boot/grub
+    
+Set the ``root`` to that disk and partition table, and boot it:
+
+    grub> set root=(hd2,gpt2)
+    grub> ls -l (hd2,gpt2)
+    grub> linux /boot/vmlinux[...].efi.signed root=UUID=[uuid from above command]
+    grub> initrd /boot/initrd[...]
+    grub> boot
+
+You will now boot to the one you just installed. It is time to
+make it persistent and change the boot order with
+``efibootmgr``. First list your current settings by:
+
+    sudo efibootmgr
+
+Now change the boot order (may vary, point being that Debian
+should come first):
+
+    sudo efibootmgr -o 0,1
+
+Now reboot and enjoy the darkness without wifi.
+
+## 2. Get Wifi Up and Running (Offline)
+
+The current Broadcom chipset is quite new, so you'll need to step
+it up to Debian "Jessie" to get it working. Cutting this a bit
+short, you will probably need this part to be offline. Showing you
+a small trick you can get all those dependencies on a vmware
+installation (run the same image as the one you installed,
+remember to simulate that you don't have network on that virtual
+installation):
+
+    apt-get -qq --print-uris install build-essential linux-headers-$(uname -r) broadcom-sta-dkms patch bzip2 wpasupplicant | cut -d\' -f 2 > urls.txt
+
+This will produce a file of urls that are all the packages
+requested and its dependencies, get the stick, format it with
+FAT - and grab the packages to it:
+
+    wget -i urls.txt
+
+Unmounting that from the virtual installation, insert it into the
+physical installation:
+
+    cd /mnt/usb
+    dpkg -i *.deb
+
+Remove all modules that may conflict (and blacklist them in
+``/etc/modprobe.d/blacklist.config``):
+
+    modprobe -r b44 b43 b43legacy ssb brcmsmac
+
+Load the Broadcom module:
+
+    modprobe wl
+    echo wl >> /etc/modules
+
+Everything that's left now is configuring and starting
+wpasupplicant:
+
+    wpa_passphrase <ssid> [passphrase] > /etc/wpa_supplicant.conf
+    wpa_supplicant -B -i wlan0 -c /etc/wpa_supplicant.conf
+
+To make it persistent enable the interface in
+``/etc/network/interfaces`` by appending:
+
+    auto wlan0
+    iface wlan0 inet dhcp
+        wpa-conf /etc/wpa_supplicant.conf
+        
+        
+If you have made an exception in your DHCP pool, you should also
+make it static (basic stuff, but anyways):
+
+    auto wlan0
+    iface wlan0 inet static
+        wpa-conf /etc/wpa_supplicant.conf
+        address 192.168.1.2
+        netmask 255.255.255.0
+        gateway 192.168.1.1
+        
+That's basically it. Enjoy the show!
+
+**Edit 1, FEB 7th 2015:** So I got to play with ``systemd``, since
+it turns out a service isn't a service the way it used to be. In
+order to start services in Debian "Jessie", you'll need to use
+``systemd``. Here's an example for ``znc`` [3]:
+
+    [Unit]
+    Description=An advanced IRC bouncer
+    After=network.target oidentd.socket
+     
+    [Service]
+    Type=simple
+    EnvironmentFile=/etc/conf.d/znc
+    User=znc
+    ExecStart=/usr/bin/znc -f $ZNC_OPTIONS
+    ExecReload=/bin/kill -HUP $MAINPID
+     
+    [Install]
+    WantedBy=multi-user.target 
+
+Also create the directory and drop the following line into
+``/etc/conf.d/znc``: ``ZNC_OPTIONS="-d /var/lib/znc"``
+
+**Edit 2, FEB 7th 2015:** To enable the Mac Mini to auto-restart
+after power failure set the following PCI value [4]:
+
+    setpci -s 0:1f.0 0xa4.b=0
+
+
+
+
+[1] http://unetbootin.sourceforge.net/  
+[2]
+http://askubuntu.com/questions/516535/how-can-i-use-the-installer-to-manually-boot-into-a-system-without-grub-installer  
+[3] https://gist.github.com/tlercher/3897561  
+[4] http://smackerelofopinion.blogspot.no/2011/09/mac-mini-rebooting-tweaks-setpci-s-01f0.html  
--- a/data/maltego-search.md
+++ b/data/maltego-search.md
@ -0,0 +1,52 @@
+I've previously been writing on how to read and process Maltego
+mtgx graph archives. When you start to get a directory with a lot
+of them you will probably be like me "Where did I see this thing
+again?"
+
+The solution can of course be done in Python like in my previous
+post, but let's try a more native solution this time, zipgrep:
+
+> zipgrep will search files within a ZIP archive for lines
+> matching the given string or pattern. zipgrep is a shell script
+> and requires egrep(1) and unzip(1L) to function. Its output is
+> identical to that of egrep(1).
+
+In my testing I had 20 files, and everything worked pretty well in
+regard to searching the files by e.g. ``zipgrep 1.2.3.4 \*.mtgx
+\*.graphml``. The problem here being that zipgrep doesn't seem to
+support printing the archive names, so thank you for
+that. Returning to the more basic zip tools, like zip cat was the
+solution in my case:
+
+    unzip -c \*.mtgx 2>&1 |egrep "(Archive: )|1.2.3.4"
+
+    Archive:  1.mtgx
+    Archive:  2.mtgx
+    Archive:  3.mtgx
+    Archive:  4.mtgx
+    Archive:  5.mtgx
+    Archive:  6.mtgx
+    Archive:  7.mtgx 
+    Archive:  8.mtgx
+    Archive:  9.mtgx
+    Archive:  10.mtgx
+    Archive:  11.mtgx
+    Archive:  12.mtgx
+    Archive:  13.mtgx
+    Archive:  14.mtgx
+    Archive:  15.mtgx
+    Archive:  16.mtgx
+              1.2.3.4
+    Archive:  17.mtgx
+              1.2.3.4
+    Archive:  18.mtgx
+    Archive:  19.mtgx
+    Archive:  20.mtgx
+
+A little Maltego archive insight helps us along speeding up the
+query, since the graphml file will always stay at
+``Graphs/Graph1.graphml``
+
+    unzip -c \*.mtgx Graphs/Graph1.graphml 2>&1 |egrep "(Archive: )|1.2.3.4"
+
+The latter results in the same results as given above.
--- a/data/matrix.md
+++ b/data/matrix.md
@ -0,0 +1,199 @@
+We  have all  been there  during security  operations. One  of the
+parties involved in  an incident or daily routine  is not prepared
+for thinking they could be compromised.
+
+Communications and  information sharing is one  of the fundamental
+things that you need to get right during a crisis.
+
+As now-retired FBI director James Comey put it to 60 minutes [1]:
+
+> There are two kinds of big companies in the United States. There
+> are those who've been hacked by  the Chinese and those who don't
+> know they've been hacked by the Chinese.
+
+The  following   question  always  arises:  How   do  we  maintain
+operational security  while still  being able to  communicate with
+all parties involved?
+
+In practical terms this requires a communications platform to:
+
+* Be independent of the service infrastructure
+* Provide traceability
+* Be resistant to resourceful threat actors
+* Have simple and secure identity management
+* Have cross-platform compability
+* Provide file-sharing  capabilities and ability to  give the user
+  an opportunity to express himself
+* Support video and audio exchanges
+* Be under  the control of the team using  it (the smallest circle
+  of trust)
+* Provide both end-to-end and transport layer encryption
+* Disposable server infrastructure
+
+This could have been  a bit too much to ask for  a couple of years
+ago, but today there are  at least two alternatives satisfying the
+above requirements:  Mattermost and the Matrix  ecosystem. For the
+remainder of this post I will  focus on how to establish an ad-hoc
+system with the tools provided by the Matrix project.
+
+## Setting Up An Out-of-Band Channel for Incident Handling with Matrix
+
+Getting started takes three steps:
+
+1. Establish a back-end server on Digital Ocean
+2. Serve the Riot front-end website
+3. Establish a recording capability with Matrix Recorder [2]
+
+For the two first points, it is clever to use an approach that can
+be  easily   reproduced  and  that  provides   exactly  the  same,
+secure-by-default  configuration  each  time.   Due  to  this  the
+preferred method  in this case  is to manage  the VPS that  can be
+established on anything with Debian  or CentOS with Ansible. There
+is     a    script     available    on     Github,    known     as
+matrix-docker-ansible-deploy  [3].  The   latter  have  also  been
+endorsed  by  the  Matrix  project  [4].  Both  1  and  2  can  be
+accomplished with ``matrix-docker-ansible-deploy``.
+
+So let's get started.
+
+### Basic DNS-service
+
+For this  example I created  a domain on namesilo.com  and pointed
+that to ``(ns1|ns2|ns3).digitalocean.com``. It would be ufortunate
+for the continuity of the service if a domain was taken offline or
+redirected  somewhere, but  due to  the end  to end  encryption in
+Matrix   it   would   not    compromise   the   content   of   the
+conversations. Now that  Digital Ocean has control  of the primary
+domain, make sure to add the following before continuing:
+
+    Type	Hostname	          Value	                       TTL	
+    A	    <domain>               <ip>                        600 
+    A	    riot.<domain>          <ip>                        600 
+    A	    matrix.<domain>        <ip>                        600 
+    SRV	    _matrix._tcp.<domain>  10 0 8448 matrix.<domain>   600 
+
+This  can take  some  time to  propagate, so  make  sure that  the
+DNS-infrastructure  is  readily  resolvable  before  you  continue
+deploying the services.
+
+### Configure
+
+Make    sure     to    grab     a    copy    of     the    current
+``matrix-docker-ansible-deploy`` by running:
+
+    git clone https://github.com/spantaleev/matrix-docker-ansible-deploy.git
+
+Create the following files:
+
+    inventory/host_vars/matrix.<domain>/vars.yml
+    inventory/hosts
+
+``vars.yml`` should look like this:
+
+    host_specific_matrix_ssl_support_email: <your-contact-email>
+    host_specific_hostname_identity: <domain>
+    matrix_coturn_turn_static_auth_secret: "<run pwgen -s 64 1>"
+    matrix_synapse_macaroon_secret_key: "<run pwgen -s 64 1>"
+
+The Ansible ``hosts`` file should be formatted like the following:
+
+    all:
+      children:
+        matrix-servers:
+          hosts:
+            matrix.<domain>:
+              ansible_user: root
+
+### Deploy and Execute
+
+Now that  your configuration files  and server are ready,  you can
+start deploying  the Matrix Synapse  server and start  serving the
+Riot HTML/JS client.
+
+First deploy the services (Riot and Matrix Synapse) by running:
+
+    ansible-playbook -i inventory/hosts setup.yml --tags=setup-main
+
+When that completes successfully, you can start the services by:
+
+    ansible-playbook -i inventory/hosts setup.yml --tags=start
+
+After starting the  services, the Riot web  interface is available
+on  ``https://riot.<domain>`` where  metadata  is  protected by  a
+Let's Encrypt certificate.
+
+The two primary endpoints you now have exposed to the WWW is:
+
+* The Matrix API which runs at https://matrix.<domain>
+* The Riot UI which runs at https://riot.<domain>
+
+Going  to   ``https://riot.<domain>``  brings  you  to   the  Riot
+logon-screen
+
+### Adding Users
+
+Registration is  disabled by default  on the server, so  new users
+can be added by the following command:
+
+    ansible-playbook -i inventory/hosts setup.yml
+                     --tags=register-user
+		     --extra-vars='username=<first user>
+		                   password=<some password>
+				   admin=(yes|no)'
+
+It is better to use pseudonyms on  such a platform to make sure no
+information can be traced to a specific individual not involved in
+the case.  Each user needs  to verify his private  key fingerprint
+with the other participants.
+
+### Vital Steps to Take as an Administrator
+
+When  using  multiple  servers,  it  is  necessary  to  create  an
+``#control`` channel that is a fallback if a server hosting a room
+goes down.
+
+### Setup Matrix Recorder
+
+To make  sure that all  communications is stored  for traceability
+make  sure to  install  the  Matrix Recorded  (MR).  MR should  be
+installed locally and _not_ on the Matrix server.
+
+    git clone https://gitlab.com/argit/matrix-recorder.git
+    cd matrix-recorder/
+    npm install
+
+To execute  the recorder,  run the following.  The first  time you
+will be asked to enter the login credentials of the user.
+
+    $ node matrix-recorder.js <case-folder>
+    Loading olm...
+    Your homeserver (give full URL): https://matrix.<domain>
+    Your username at the homeserver: <username>
+    Your password at the homeserver: <password>
+    No of items to retrieve for initial sync: 1000
+    [...]
+
+View messages  as HTML by  running the Matrix  Recorder conversion
+script:
+
+    node recorder-to-html.js <case-folder>
+
+### Controlling Logins
+
+Access monitoring  can be done  in the  console by e.g.  ``tail -f
+/matrix/synapse/run/homeserver.log``.
+
+### The Power of Disposability
+
+At  some point  you have  finished the  information exchange.  The
+beauty of this setup is that is can now be safely deleted from the
+Digital Ocean droplet console.
+
+
+[1] James Comey and 60 minutes: https://www.cbsnews.com/news/fbi-director-james-comey-on-threat-of-isis-cybercrime/  
+
+[2] Matrix Recorder: https://matrix.org/docs/projects/other/matrix-recorder.html 
+
+[3] matrix-docker-ansible-deploy: https://github.com/spantaleev/matrix-docker-ansible-deploy 
+
+[4] Matrix project endorsement: https://matrix.org/blog/2018/06/01/this-week-in-matrix-2018-06-01/ 
--- a/data/microsoft-dominating-email.md
+++ b/data/microsoft-dominating-email.md
@ -0,0 +1,159 @@
+## Key Takeaways
+
+* While market dominance was formerly an issue discussed for
+  operating systems, the modern equivalent occurs in form of cloud
+  services, primarily from Microsoft, Amazon and Google.
+
+* Data from the Norwegian business registry mapped to email
+  records shows that Microsoft Office 365 has become a dominating
+  force amongst Norwegian private businesses and 61% of the
+  government.
+
+* Microsoft being a significant actor for email indicates that
+  Norwegian organisations are putting a lot more faith in
+  Microsoft. Today email as a service is bundled with direct
+  messaging and wikis.
+
+## Introduction
+
+In 2003 Dan Geer, Bruce Schneier and others wrote a paper named
+"How the Dominance of Microsoft's Products Poses a Risk to
+Security". It eventually cost Geer his job at AtStake.
+
+The paper evolves around Microsoft's dominance in operating
+systems and Geer has later given Microsoft credit for a better
+approach to security [2].
+
+In this article I am not going to reiterate on the points made by
+Geer et àl. I think these are perfectly valid and easily
+transferrable to the current landscape. The whole paper is
+read-worthy, but I'd like highlight one part:
+
+> Governments, and perhaps only governments, are in leadership
+> positions to affect how infrastructures develop. By enforcing
+> diversity of platform to thereby blunt the monoculture risk,
+> governments will reap a side benefit of increased market
+> reliance on interoperability, which is the only foundation for
+> effective incremental competition and the only weapon against
+> end-user lock-in. A requirement that no operating system be more
+> than 50% of the installed based in a critical industry or in a
+> government would moot monoculture risk. Other branches to the
+> risk diversification tree can be foliated to a considerable
+> degree, but the trunk of that tree on which they hang is a total
+> prohibition of monoculture coupled to a requirement of
+> standards-based interoperability.
+
+Azure is Windows in 2021. The walled gardens are somewhat
+redefined - but they are there in a similar fashion as Windows was
+in 2003. The Microsoft monopoly is technically broken, and there
+are now options from Amazon, Google and even Apple, but I would
+argue the monoculture is still present in shared approaches,
+infrastructure and concepts.
+
+I decided to have a closer look at the distribution from a
+representative dataset provided by an authorative source in
+Norway; the business registry.
+
+## Taking a Close Look at The Data
+
+In Norway we a public registry of organisations. This registry is
+categorised by standardised sector codes (typically "government",
+"private" and so on). Using the JSON-data provided by brreg.no, a
+list of websites can be extracted:
+
+ 1. Retrieve the organisation list from brreg.no [1]
+
+```
+    curl https://data.brreg.no/enhetsregisteret/api/enheter/lastned > enheter.gz
+    gzip -d enheter.gz
+```
+
+ 2. Reshape the JSON data by website URL, sector and business code.
+
+```
+    cat enheter | 
+	jq '[.[] | select(.hjemmeside != null) | {url:.hjemmeside, code:.naeringskode1.kode, sector:.institusjonellSektorkode.kode}]' > webpages.txt
+```
+
+ 3. Based on the URL, add the primary domain and resolve its MX
+   record and the MX primary domain to each JSON entity
+   
+ 4. Using the JSON-file generated above, populate the following
+   JSON dictionary. This is also a rough categorisation based on
+   the standard provided by Statistics Norway (I'm sure it could
+   be improved) [4]:
+
+```
+   {
+     "government":{"codes": [6100,6500,1110,1120], "total":0, "counts":{}},
+     "municipals":{"codes": [1510,4900,1520], "total":0, "counts":{}},
+     "finance":{"codes": [3200,3500,3600,4300,3900,4100,4500,4900,5500,5700,4900,7000], "total":0, "counts":{}},
+     "private":{"codes": [4500,4900,2100,2300,2500], "total":0, "counts":{}}
+   }
+```
+
+ 5. Generate CSV output based on each sector grouping above.
+
+
+## The Result
+
+The top vendor was not surprising Microsoft's outlook.com. For the
+120k sites, 98k resolved an MX record. Of these I will give an
+outlook.com summary as follows, as it would seem this is the
+dominating actor in all categories:
+
+* In government 61% is O365 users (1420/2317)
+
+* For municipals, the amount is 55% (688/1247)
+
+* For the diverse financial grouping, 21% uses O365 (4836/23125)
+
+* For the diverse private companies 38% uses O365 (14615/38129)
+
+Of the 98k sites Microsoft runs the email service for 21559
+organisations. For comparison Google MX domains accounts
+for about 5500.
+
+While the above are directly a measurement of who delivers email
+services, it also indicated that these organisations relies on
+other services, such as internal wikis and direct messaging.
+
+An overview of the top 10 vendors are shown below.
+
+![](static/img/data/mx_domains.png)
+
+
+
+## Sources of Errors
+
+Even though I believe the statistics above is representative it
+has some possible sources of error:
+
+1. The organisation isn't listed with URL in the organisation
+   registry or it uses a domain not associated with the primary
+   domain of its web address
+   
+2. The organisation uses an SMTP proxy
+
+3. The organisation has an inactive SMTP record
+
+I found that there are more than 1 million listed organisations in
+the brreg.no registry and 120k websites in the JSON data
+provided. This means this dataset represent at most 12% of the
+companies listed.
+
+Also, email doesn't represent a diverse infrastructure, but I
+believe it is an indicator of the current trends also for other
+cloud services in e.g. Azure, Google Compute Engine and so on.
+
+
+
+[1] CyberInsecurity: The Cost of Monopoly, Geer et àl, 2003 -
+https://cryptome.org/cyberinsecurity.htm
+
+[2] Cybersecurity as Realpolitik by Dan Geer presented at Black
+Hat USA 2014: https://www.youtube.com/watch?v=nT-TGvYOBpI
+
+[3] https://data.brreg.no/enhetsregisteret/api/enheter/lastned
+
+[4] https://www.ssb.no/klass/klassifikasjoner/39
--- a/data/msg-eml.md
+++ b/data/msg-eml.md
@ -0,0 +1,58 @@
+Thought I’d share a neat little script-combo if you do your
+email analysis on Linux systems, or do automation. For the
+task you’ll need msgconvert.pl [1] and ripmime [2].
+
+MSG files are used by Microsoft Outlook, and is the natural
+fit in regard to malicious messages in organizations running
+Microsoft products. For reference you can find the
+specification for the Outlook Item File Format here.
+
+In this part you will require a file from Outlook, which you
+can acquire by selecting a message and drag it to the
+desktop or a new message. If you don’t do Outlook, you can
+just google for one [3].
+
+    msgconvert.pl <message>.msg
+    ripmime -i <message>.mime
+
+The above will first convert the MSG file to a mime
+file. The latter command will make sure to extract the
+objects in it, such as binary files or documents. The text
+files contains the content of the email and will be
+something like: textfile0
+
+If you need the headers you will find them at the top of the
+mime-file.
+
+Now to EML-files, which you will also often find when
+exporting email messages. EML is really just short for
+“E-mail”. In OS X Mail, Outlook Express, Thunderbird (and
+others) you are typically presented with EML/MIME-formatted
+documents, and it’s just a document which complies with RFC
+822 [4]. EML-files are more easy to work on since you can
+open it in a text editor and read the essential information
+plain straight away.
+
+So what does that mean in regard to ripmime? It really just
+means that instead of calling the output from msgconvert.pl
+<message>.mime, you can name the file <message>.eml. In
+commands:
+
+    ripmime -i <message>.eml
+
+The above will output your mime parts.
+
+## OS X Specifics
+
+You may want to do the above on an OS X system as well. For
+this you can install ripmime via Homebrew [5].
+
+If you are exporting an eml from Apple Mail you may do so
+the same way as in Outlook: Just drag it where you want it.
+
+
+[1] https://www.matijs.net/software/msgconv/  
+[2] https://www.pldaniels.com/ripmime/  
+[3] https://www.google.com/search?q=filetype:msg&oq=filetype:msg#q=filetype:msg+outlook  
+[4] https://tools.ietf.org/html/rfc822  
+[5] https://brew.sh/index_nb  
--- a/data/new-format.md
+++ b/data/new-format.md
@ -0,0 +1,70 @@
+After being off the HTML grid for a while, using Hugo as a
+static site generator for Gopher. I went tired of the
+upgrade and complexity issues with publishing new
+content. It all culminated with Hugo refusing to generate
+the site at all after the last update.
+
+Because of the Hugo failure I needed to create a new
+strategy, and not being willing to change to another complex
+generator system I went hunting for something else.
+
+I am happy with my current backend publishing setup, which
+uses git and a post-receive hook:
+
+    pwd=$(pwd)
+    if test -z "${pwd##*.git}"
+    then repo="$pwd"
+    else repo="$pwd/.git"
+    fi
+
+    git --work-tree=~/secdiary/content --git-dir=~/secdiary/content.git checkout -f
+    cd ~/secdiary
+    rm -r /var/www/secdiary.com/*
+    rm -r /var/gopher/*
+    cp -R html/* /var/www/secdiary.com/
+    cp -R gopher/* /var/gopher/
+    
+    cp ~/twtxt/content/twtxt.txt /var/www/secdiary.com/
+    
+    echo "\nBuild: " >> /var/gopher/index.gph
+    git --git-dir=~/secdiary/content.git log -1 --pretty="%H%n%ci" >> /var/gopher/index.gph
+
+I also publish twtxt messages in a similar way. My twtxt
+config looks like the following:
+
+    [twtxt]
+    nick = tommy
+    twtfile = ~/twtxt/twtxt.txt
+    twturl = http://secdiary.com
+    disclose_identity = False
+    character_limit = 140
+    character_warning = 140
+    post_tweet_hook = "cd ~/twtxt/ && git pull && git add twtxt.txt && git commit -m 'added new tweet' && git push"
+
+In addition to my twtxt feed, I am present on Mastodon,
+which lead me to Solene's static site generator cl-yag
+[1,2]. I decided to generate the site client-side for
+now, but in the future I'll likely move this to the server
+for less complex workflows on my workstations. This also
+fits me well since I'll be moving more of my workflow to
+OpenBSD in the coming months.
+
+The layout of my new site is more or less shamelessly stolen
+from Solene as well. I plan to customize that to my liking as
+we go.
+
+And with that I am back in the WWW space, however in a
+limited format. I am currently reviewing my 50 current
+posts and will assess what can be of use in the future. This
+will involve some rewriting as well, since this space will
+be text-only out of respect for your time.
+
+I also enabled TLS on the site for those that would like to
+browse privately, opposed to my current Gopher setup. The
+latter you may find on ``gopher://secdiary.com``.
+
+Feel free to reach out to me in the Fediverse. I'm there as
+@tommy@cybsec.network.
+
+[1] https://dataswamp.org/\~solene/2018-10-12-cl-yag-20181012.html  
+[2] git://bitreich.org/cl-yag  
--- a/data/novel-pdf-detection.md
+++ b/data/novel-pdf-detection.md
@ -0,0 +1,792 @@
+For some time now the Portable Document Format standard has
+been a considerable risk in regard to corporate as well as
+private information security concerns. Some work has been
+done to classify PDF documents as malicious or benign, but
+not as much when it comes to clustering the malicious
+documents by techniques used. Such clustering would provide
+insight, in automated analysis, to how sophisticated an
+attack is and who staged it.  A 100.000 unique PDF dataset
+was supplied by the Shadowserver foundation. Analysis of
+experiment results showed that 97% of the documents
+contained javascripts. This and other sources revealed that
+most exploits are delivered through such, or similar object
+types. Based on that, javascript object labeling gets a
+thorough focus in the paper.
+
+The scope of the paper is limited to extend the attribution
+research already done in regard to PDF documents, so that a
+feature vector may be used in labeling a given (or a batch)
+PDF to a relevant cluster. That as an attempt to recognize
+different techniques and threat agents.
+
+> Javascript is currently one of the most exploited PDF
+  objects. How can the PDF feature vector be extended to
+  include a javascript subvector correctly describing the
+  technique/style, sophistication and similarity to previous
+  malicious PDF documents. How does it relate to the term
+  digital evidence?  
+> — Problem statement
+
+The problem statement considers the coding styles and
+obfuscation techniques used and the related sophistication
+in the coding style. Least but most important the statement
+involves how the current PDF document measures to others
+previously labeled. These are all essential problems when it
+comes to automatated data mining and clustering.
+
+### A. Related Work
+
+Proposed solutions for malicious contra benign
+classification of PDF documents has been explicitly
+documented in several papers. Classification using support
+vector machines (SVM) was handled by Jarle Kittilsen in his
+recent Master's thesis1.
+
+Further, the author of this paper in his bachelor's thesis2
+investigated the possibility to detect obfuscated malware by
+analyzing HTTP data traffic known to contain malware. In
+regard, the findings were implemented, designed and tested
+in Snort. Some of the detection techniques will be used as a
+fundament for labeling in this paper.
+
+Even though much good work has been done in the era of
+analyzing malicious PDF documents, many of the resulting
+tools are based on manual analysis. To be mentioned are
+Didier Stevens who developed several practical tools, such
+as the PDF parser and PDFid. These tools are not only tools,
+but was the beginning of a structured way of looking at
+suspicious objects in PDF documents as well. To be credited
+as well is Paul Baccas in Sophos, which did considerable
+work on characterizing malicious contra benign PDF
+documents3.
+
+The paper will be doing research into the feature,
+javascript subvector of malicious PDF documents. To be able
+to determine an effective vector (in this experimental
+phase), it is essential that the dataset is filtered,
+meaning that the files must be malicious. As Kittilsen has
+done in regard to PDF documents, Al-Tharwa et ál2 has done
+interesting work to detect malicious javascript in browsers.
+
+## Background
+### A.1. The Feature Vector in Support of Digital Evidence
+
+Carrier and Spafford defined "digital evidence" as any
+digital data that contain reliable information that supports
+or refutes a hypothesis about the incident7. Formally, the
+investigation process consists of five parts and is
+specially crafted for maintaining evidence integrity, the
+order of volatility (OOV) and the chain of custody. This all
+leads up to the term forensic soudness.
+
+The investigation process consists of five phases. Note the
+identification and analysis phase.
+
+![Fig. 1: The investigation process. The investigation
+ process consists of five phases9. Note the identification
+ and analysis
+ phase](/images/2015/02/Theinvestigationprocess-e1380485641223.png)
+
+In this paper, forensic soudness is a notion previously
+defined10 as meaning: No alternation of source data has
+occured. Traditionally this means that every bit of data is
+copied and no data added. The previous paper stated two
+elementary questions:
+
+* Can one trust the host where the data is collected from?
+* Does the information correlate to other data?
+
+When it comes to malicious documents, they are typically
+collected in two places:
+
+1. In the security monitoring logging, the pre-event phase
+2. When an incident has occured and as part of the reaction to an
+   incident (the collection phase)
+
+Now, the ten thousand dollar question: When a malicious
+document gets executed on the computer, how is it possible
+to get indications that alteration of evidence has occured?
+The answer is potentially the first collection point, the
+pre-event logging.
+
+In many cases, especially considering targeted attacks, it
+is not possible to state an PDF document as malicious in the
+pre-event phase. The reason for this is often the way the
+threat agent craft his attack to evade the security
+mechanisms in the target using collected intelligence. Most
+systems in accordance to local legislation should then
+delete the content data. A proposition though is to store
+the feature vector.
+
+The reasoning behind storing a feature vector is quite
+simple: When storing hashes, object counts and the
+javascript subvector which we will return to later in the
+paper, it will be possible to indicate if the document
+features has changed. On the other side there is no
+identifiable data invading privacy.
+
+It is reasonable to argue that the measure of how similar
+one PDF document is to another, is also the measure of how
+forensically sound the evidence collected in a post-event
+phase is. How likely it is that the document aquired in the
+collection phase is the same as the one in the pre-phase is
+decided by the characteristics supplied by the feature
+vectors of both. Further, the feature-vector should be as
+rich and relevant as possible.
+
+![Fig. 2: Correlation by using the feature vector of the PDF
+ document. Illustration of a possible pre/post incident
+ scenario](/images/2015/02/Preandpost.png)
+
+### A.2. Identification as an Extension of Similarity
+
+The notion of similarity largely relates to the feature
+vector: How is it in large quantities of data possible to
+tell if the new PDF document carries similar characteristics
+like others of a larger dataset.
+
+In his work with semantic similarity and preserving hashing,
+M. Pittalis11 defined similarity from the Merriam-Webster
+dictionary:
+
+> Similarity: The existance of comparable aspect between two
+> elements  
+> – Merriam-Webster Dictionary
+
+The measure of similarity is important in regard to
+clustering or grouping the documents. When clustering
+datasets the procedure is usually in six steps, finding the
+similarity measure is step 2.
+
+1. Feature selection
+2. Proximity/similarity measure
+3. Clustering criterion
+4. Clustering algorithm
+5. Validation
+6. Interpretation
+
+In this paper the k-means unsupervised learning clustering
+algorithm was consideres. This simple algorithm groups the
+number n observations into k clusters22. Each observation
+relates to the cluster with the nearest mean.
+
+Now, as will be seen over the next two sections, work done
+in the subject is mostly missing out on giving a valid
+similarity measure when it comes to classifying PDF
+documents as anything other than malicious or benign. So, to
+be able to cluster the PDF documents the feature vector will
+need a revision.
+
+As Pittalis introduced the concept of similarity, it is
+important to define one more term: Identification. According
+to the American Heritage Dictionary, identification is:
+
+> Proof or Evidence of Identity.
+> — The American Heritage Dictionary
+
+In our context this means being able to identify a PDF
+document and attribute it to e.g. a certain type of botnet
+or perhaps more correct a coding or obfuscation
+technique. In an ideal state this will give an indication to
+which threat agent is behind the attack. This is something
+that has not been researched extensively in regard to PDF
+documents earlier.
+
+### C. The Portable Document Format
+
+When it comes to the feature vector of the portable document
+format (PDF), it is reasonable to have a look at how PDF
+documents are structured. The PDF consists of objects, each
+object is of a certain type. As much research has been done
+on the topic previously, the format itself will not be
+treated any further in this paper12.
+
+![A simplified illustration of the portable document format](/images/2015/02/ObjectdescriptionPDF-2.png)
+
+When considering malicious PDF documents, relevant
+statistics has shown the following distribution of resource
+objects:
+
+**Known Malicious Datasets Objects** A table showing a
+number interesting and selected features in malicious seen
+against clean PDF documents. Baccas used two datasets where
+one indicated slightly different results.
+
+    Dataset	Object Type	Clean (%)	Malicious (%)
+    The Shadowserver 100k PDF malicious dataset	 /JavaScript	            NA	    97%
+    --
+    Paul Baccas' Sophos 130k malicious/benign dataset3	/JavaScript	     2%   94%
+                /RichMedia	   0%	 0,26%
+                /FlateDecode	89%	  77%
+                /Encrypt	    0,91% 10,81%
+
+What can be seen of the table above is that when it comes to
+the distribution of objects in malicious files, most of them
+contains javascript. This makes it very hard to distinguish
+and find the similarity between the documents without
+considering a javascript subvector. The author would argue
+that this makes it a requirement for a javascript subvector
+to be included in the PDF feature vector to make it a
+valid. In previous work, where the aim has been to
+distinguish between malicious and benign, this has not been
+an issue.
+
+### D. Closing in on the Core: The PDF Javascript Feature Subvector
+
+Javascript is a client-side scripting language primarily
+offering greater interactivity with webpages. Specifically
+javascript is not a compiled language, weakly-typed4 and has
+first-class functions5. In form of rapid development, these
+features gives great advantages. In a security perspective
+this is problematic. The following states a Snort signature
+to detect a javascript "unescape"-obfuscation technique2(we
+will return to the concept of obfuscation later on):
+
+    alert tcp any any -> any any (msg:”Obfuscated unescape”; sid: 1337003; content:”replace”; pcre:”/u.{0,2}n.{0,2}e.{0,2}s.{0,2}c.{0,2}a.{0,2}p.{0,1}e’ ?.replace (/”;rev:4;)
+
+Traditionally javascript is integrated as a part of an
+browser. Seen from a security perspective, this opens for
+what is commonly known as client-side attacks. More
+formally: Javascript enables programmatic access to
+computational objects within a host environment. This is
+complicated as javascript comes in different flavors, making
+general parsing and evaluation complex6, as may be seen of
+the above signature. The flavors are often specific to the
+application. Today, most browsers are becoming more aligned
+due to the requirements of interoperability. Some
+applications, such as the widely deployed Adobe Reader has
+some extended functionality though, which we will be
+focusing on in this paper.
+
+Even though javascript may pose challenges to security, it
+is important to realize that this is due to
+complexity. Javascript (which is implemented through
+SpiderMonkey in Mozilla18-products and in Adobe Reader as
+well) builds on a standard named ECMA-262. The ECMA is an
+standardization-organ of Information and Communication
+Technology (ICT) and Consumer Electronics (CE)17. Thus,
+Javascript is built from the ECMAScript scripting language
+standard. To fully understand which functions is essential
+in regard to malicious Javascripts this paper will rely on
+the ECMAScript Language Specification19 combined with expert
+knowledge.
+
+### E. Introducing Obfuscation
+
+Harawa et al.8 describes javascript obfuscation by six elements:
+
+* Identifier reassignment or randomization
+* Block randomization
+* White space and comment randomization
+* Strings encoding
+* String splitting
+* Integer obfuscation
+
+Further, Kittilsen1 documented a javascript feature vector
+which states the following functions as potentially
+malicious: [function, eval_length, max_string, stringcount,
+replace, substring, eval, fromCharCode]. Even though his
+confusion matrix shows good results, there are some problems
+when it comes to evaluating these as is: Such characters are
+usually obfuscated. The following is an example from sample
+``SHA256:d3874cf113fa6b43e7f6e2c438bd500edea5cae7901e2bf921b9d0d2bf081201]``:
+
+    if((String+'').substr(1,4)==='unct'){e="".indexOf;}c='var _l1="4c206f5783eb9d;pnwAy()utio{.VsSg',h&lt;+I}*/DkR%x-W[]mCj^?:LBKQYEUqFM';l='l';e=e()[((2+3)&#63;'e'+'v':"")+"a"+l];s=[];a='pus'+'h';z=c's'+"ubstr" [1];sa [2];z=c's'+"ubstr" [3];sa [2];z=c['s'+"ubstr"] [...]e(s.join(""));}
+
+The above example tells an interesting story about the
+attackers awareness of complexity. In respect to Kittilsens
+javascript feature vector the above would yield the
+following result: [0,x,x,x,0,0,0,0] (considerable results on
+the second to fourth, plus one count if we are to shorten
+substring to substr), in other words the features are to be
+found in the embedded, obfuscated javascript, but not in
+clear text. When it comes to eval_length, max_string and
+string_count we will return to those later in the paper.
+
+Deobfuscated, the script would look like:
+
+    var _l1="[...]";_l3=app;_l4=new Array();function _l5(){var _l6=_l3.viewerVersion.toString();_l6=_l6.replace('.','');while(_l6.length&4)_l6l='0';return parsetnt(_l6,10);function _l7(_l8,_l9){while(_l8.length+2&_l9)_l8l=_l8;return _l8.substring(0,_l9I2);function _t0(_t1){_t1=unescape(_t1);rote}a*=_t1.length+2;da*/ote=unescape('Du9090');spray=_l7(da*/ote,0k2000Rrote}a*);lok%hee=_t1lspray;lok%hee=_l7(lok%hee,524098);for(i=0; i & 400; ill)_l4xi-=lok%hee.substr(0,lok%hee.lengthR1)lda*/ote;;function _t2(_t1,len){while(_t1.length&len)_t1l=_t1;return _t1.substring(0,len);function _t3(_t1){ret='';for(i=0;i&_t1.length;il=2){b=_t1.substr(i,2);c=parsetnt(b,16);retl=String.froW[har[ode(c);;return ret;function _]i1(_t1,_t4){_t5='';for(_t6=0;_t6&_t1.length;_t6ll){_l9=_t4.length;_t7=_t1.char[odeAt(_t6);_t8=_t4.char[odeAt(_t6D_l9);_t5l=String.froW[har[ode(_t7m_t8);;return _t5;function _t9(_t6){_]0=_t6.toString(16);_]1=_]0.length;_t5=(_]1D2)C'0'l_]0j_]0;return _t5;function _]2(_t1){_t5='';for(_t6=0;_t6&_t1.length;_t6l=2){_t5l='Du';_t5l=_t9(_t1.char[odeAt(_t6l1));_t5l=_t9(_t1.char[odeAt(_t6));return _t5;function _]3(){_]4=_l5();if(_]4&9000){_]5='oluAS]ggg*pu^4?:IIIIIwAAAA?AAAAAAAAAAAALAAAAAAAAfhaASiAgBA98Kt?:';_]6=_l1;_]7=_t3(_]6);else{_]5='*?lAS]iLhKp9fo?:IIIIIwAAAA?AAAAAAAAAAAALAAAAAAAABk[ASiAgBAIfK4?:';_]6=_l2;_]7=_t3(_]6);_]8='SQ*YA}ggAA??';_]9=_t2('LQE?',10984);_ll0='LLcAAAK}AAKAAAAwtAAAALK}AAKAAAA?AAAAAwK}AAKAAAA?AAAA?gK}AAKAAAA?AAAAKLKKAAKAAAAtAAAAEwKKAAKAAAAwtAAAQAK}AUwAAA[StAAAAAAAAAAU}A]IIIII';_ll1=_]8l_]9l_ll0l_]5;_ll2=_]i1(_]7,'');if(_ll2.lengthD2)_ll2l=unescape('D00');_ll3=_]2(_ll2);with({*j_ll3;)_t0(*);Ywe123.rawValue=_ll1;_]3();
+
+Which through the simple Python script javascript feature
+vector generator (appendice 1), yields:
+
+    ['function: 9', 'eval_length: x', 'max_string: x', 'stringcount: x', 'replace: 1', 'substring|substr: 4', 'eval: 0', 'fromCharCode: 0']
+
+Harawa et al.' 6 elements of javascript obfuscation is
+probably a better, or necessary supplemental approach to
+Kittilsens work.
+
+There is a notable difference between deobfuscation and
+detecting obfuscation techniques. The difference consists of
+the depth of insight one might gain in actually
+deobfuscating a javascript as it will reveal completely
+different code while the obfuscation routines may be based
+on a generic obfuscator routine used by several threat
+agents. This is much like the issue of packers in regard to
+executables23.
+
+This section has shown the difficulties of balancing
+deobfuscation for a more detailed coding style analysis
+against a less specific feature vector by using abstract
+obfuscation detection.
+
+## Extracting and Analysing a PDF Feature Vector
+
+### A. Deobfuscation - Emerging Intentions
+
+Usually the most pressing question when an incident
+involving a PDF document occur is: Who did it, and what's
+his intentions. This is also a consideration when further
+evolving the PDF feature vector. In the next figure is a
+model describing three groups of threat agents, where one
+usually stands out. Such as if a Stuxnet scale attack24
+involving a PDF document is perceived it will be associated
+with a cluster containing "group 1" entities.
+
+While Al-Tharwa et ál2 argues for no need for deobfuscation
+in regard to classification, deobfuscation is an important
+step in regard to finding a distinct feature vector. The
+issue is that in most situations it isn't good enough to
+tell if the documents is malicious, but also in addition to
+who, what, where and how it was created. In regard to being
+defined as valid digital evidence a rich feature vector (in
+addition to the network on-the-fly hash-sum) is part of
+telling. The latter also makes itself relevant when it comes
+to large quantities of data, where an analyst is not capable
+of manually analyzing and identifying hundreds to tens of
+thousands of PDF documents each day.
+
+![Fig. 4: The threat agent modelA model describing three
+ groups of attackers. These are necessary to filter and
+ detect in the collection
+ phase](/images/2015/02/threat-agent-model.png)
+
+### B. Technical Problems During Deobfuscation
+
+Normally most javascript engines, such as Mozillas
+Spidermonkey15, Google V816 and others, tend to be
+javascript libraries for browsers and miss some basic
+functionality in regard to Adobe Reader which is the most
+used PDF reader. These engines is most often used for
+dynamic analysis of Javascripts and is a prerequiste when it
+comes to being able to completely deobfuscate javascripts.
+
+To prove the concepts of this article a static Python
+feature vector generator engine based on a rewritten version
+of the Jsunpack-n14project is used. The application used in
+the paper is providing a vector based interpretation of the
+static script, meaningn it is not run it dynamically.
+
+Reliably detecting malicious PDF documents is a challenge
+due to the obfuscation routines often used. This makes it
+necessary to perform some kind of deobfuscation to reveal
+more functionality. Even if one managed to deobfuscate the
+script one time, there may be several rounds more before it
+is in clear text. This was a challenge not solvable in the
+scope of this article.
+
+Due to parsing errors under half of the Shadowserver 100k
+dataset was processed by the custom Jsunpack-n module.
+
+### C. Introducing Two Techniques: Feature Vector Inversion and Outer Loop Obfuscation Variable Computation
+
+As have been very well documented so far in the paper it is
+more or less impossible to completely automate an
+deobfuscation process of the PDF format. Obfuscation leaves
+many distinct characteristics though, so the threat agent on
+the other hand must be careful to not trigger anomaly
+alarms. There is a balance. This part of the article
+introduces two novel techniques proposed applied to the
+javascript subvector to improvie its reliability.
+
+#### C.1. Outer Loop Obfuscation Variable Computation (OLOVC)
+
+When the threat agent implements obfuscation, one of his
+weaknesses is being detected using obfuscation. When it
+comes to PDF documents using javascripts alone is a
+trigger. Now, the threat agent is probably using every trick
+in the book, meaning the 6 elements of javascripts
+obfuscation8. The job of an analyst in such a matter will be
+to predict new obfuscation attempts and implement anomaly
+alerts using the extended PDF feature vector.
+
+Throughout this paper we will name this technique "Outer
+Loop Obfuscation Variable Computation". The term "outer
+loop" most often refer to round zero or the first of the
+deobfuscation routines. Variable computation is as its name
+states, a matter of computing the original javascript
+variable. As we have seen this may be done by either
+deobfuscating the script as a whole including its
+near-impossible-for-automation complexity, or use the
+original obfuscated data. We will have a further look at the
+latter option.
+
+Take for instance this excerpt from the "Introducing Obfuscation"-section:
+
+    z=c['s'+"ubstr"](0,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](2,1);s[a](z);z=c['s'+"ubstr"](3,1);s[a](z);z=c['s'+"ubstr"](4,1);s[a](z);z=c['s'+"ubstr"](5,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);z=c['s'+"ubstr"](7,1);s[a](z);z=c['s'+"ubstr"](8,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](10,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](13,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](14,1);s[a](z);z=c['s'+"ubstr"](12,1);[...](20,1);s[a](z);z=c['s'+"ubstr"](17,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](18,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](17,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](13,1);s[a](z);z=c['s'+"ubstr"](19,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](14,1);s[a](z);z=c['s'+"ubstr"](17,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);
+
+
+Harawa ét al defined the above obfuscation technique as
+"string splitting" (as seen in the section "Introducing
+obfuscation"). The following two obfuscation-extraction
+regular expressions, is previously stated in the authors
+Bachelors thesis2:
+
+    e.{0,2}v.{0,2}a.{0,2}l.{0,1}
+
+    u.{0,2}n.{0,2}e.{0,2}s.{0,2}c.{0,2}a.{0,2}p.{0,1}e
+
+Keep the two above statements and the previous code excerpt
+in mind. When breaking down the above expressions we
+introduce one more regular expression:
+
+    s.{0,4}u.{0,4}b.{0,4}s.{0,4}t.{0,4}r.{0,4}
+
+While searching for "substr" in plain text in the plain-text
+will certainly fail, the above expression will match e.g.:
+
+    's'+"ubstr"
+
+Recall Kittilsens javascript feature vector: ``[function,
+eval_length, max_string, stringcount, replace, substring,
+eval, fromCharCode]``. If extended by the above techniques,
+the results is somewhat different.
+
+Without string splitting detection:
+
+    ['function: 9', 'eval_length: x', 'max_string: 10849', 'stringcount: 1', 'replace: 1', 'substring|substr: 4', 'eval: 0', 'fromCharCode: 0']
+
+With outer loop obfuscation variable computation:
+
+    ['function: 0', 'eval_length: x', 'max_string: 67', 'stringcount: 2', 'replace: 0', 'substring: 0', 'substr: 3663', 'eval: 1', 'fromCharCode: 0']
+
+Additionally, rewriting and extending Kittilsens feature
+vector by several other typically suspicious functions
+should give preferrable results: ``[max_string, stringcount,
+function, replace, substring, substr, eval, fromCharCode,
+indexof, push, unescape, split, join, sort, length,
+concat]``
+
+This makes the following results in two random, but related, samples:
+
+    [SHA256:5a61a0d5b0edecfb58952572addc06f2de60fcb99a21988394926ced4bbc8d1b]:{'function': 0, 'sort': 0, 'unescape': 0, 'indexof': 0, 'max_string': 10849, 'stringcount': 2, 'replace': 0, 'substring': 0, 'substr': 1, 'length': 1, 'split': 2, 'eval': 0, 'push': 0, 'join': 1, 'concat': 0, 'fromCharCode': 0}
+
+    [SHA256:d3874cf113fa6b43e7f6e2c438bd500edea5cae7901e2bf921b9d0d2bf081201]:{'function': 0, 'sort': 0, 'unescape': 0, 'indexof': 0, 'max_string': 67, 'stringcount': 1, 'replace': 0, 'substring': 0, 'substr': 3663, 'length': 0, 'split': 0, 'eval': 0, 'push': 1, 'join': 1, 'concat': 0, 'fromCharCode': 0}
+
+It may perhaps not need a comment, but in the above results
+we see that there are two types of elements in the feature
+vector that stands out: max_string and two of the suspicious
+functions.
+
+Summarized the "Outer Loop Obfuscation Variable Computation"
+may be used to, at least partially, defeat the malware
+authors obfuscation attempts. By running the somewhat
+complex regular expressions with known malicious obfuscation
+routines, the implementation result of the 100.000 PDF
+dataset may be seen in the following table: Dataset
+generalization by "outer loop obfuscation variable
+computation" Dataset aggregated by counting javascript
+variables and functions, OLOVC applied (due to errors in the
+jsunpack-n the total number of entities calculated is
+42736).
+
+    Word	    Count
+    function	  651
+    sort	     7579
+    unescape	    4
+    toLowerCase	 1
+    indexof	     8
+    max_string  42346
+    stringcount 41979
+    replace	    70
+    substring	  91
+    replace	    70
+    substring  	91
+    substr	  38952
+    length	   1512
+    split	    9621
+    eval	       77
+    push	      260
+    join	       91
+    inverse_vector 41423
+    concat	     86
+    fromCharCode   45
+
+By the counts in the above table it is shown that the
+selected feature vector has several very interesting
+features. On a sidenote: Even though some features has a
+larger quantity than others it should be mentioned that this
+is not necessarily the measure of how good that feature is,
+such is especially the case with the inverse vector as we
+will be more familiar with in the next section. Also, as
+previously mentioned it is interesting to see the
+composition of multiple features to determine the origin of
+the script (or the script style if you'd like). The
+aggregation script is attached in appendice 2.
+
+The "Outer Loop Obfuscation Variable Computation" will
+require a notable amount of computational resources in
+high-quantity networks due to the high workload. In a way
+this is unavoidable since the threat agents objective of
+running client-side scripts is to stress the resources of
+such systems.
+
+![Fig. 5: Illustration of Computational Complexity. The illustration shows the computational load on a network sensor in regard to different obfuscation techniques](/images/2015/02/Skjermbilde-2012-05-08-kl--20-43-04.png)
+
+### C.2. Feature Vector Inversion
+
+Threat agents go a long way in evading detection
+algorithms. The following thought is derived from a common
+misconception in database security:
+
+> A group of ten persons which names are not to be revealed
+  is listed amongst a couple of thousands, in an
+  organizations LDAP directory. The group, let us name it X,
+  is not to be revealed and is therefore not named in the
+  department field.
+
+While the public may not search and filter directly on the
+department name, being X, an indirect search would be
+succesful to reveal the group due to the ten persons being
+the only ones not associated with a department.
+
+The concept of searching indirectly may be applied to
+evaluating javascripts in PDF documents as well. We might
+start off with some of the expected characters found in
+benign javascript documents:
+
+    {'viewerVersion':1,'getPrintParams':1,'printd':1,'var':10,'getPageNthWord':1,'annot':2,'numPages':1,'new':3}
+
+The above which is found by expert knowledge as the probable
+used variables and functions in a benign javascript or other
+object. Much of these functions is used in interactive PDF
+documents, e.g. providing print buttons,
+
+A weight is added to each cleartext function/variable. After
+counting the words in the document a summarized variable
+named the inverted_feature_vector gives an integer. The
+higher the integer, the higher the probability of the
+javascript being benign.
+
+The inversed feature vector may be used as a signature and a
+whitelist indication database may be built of datasets. In
+the 100k malicious dataset the statistics showed that out of
+42475, 41423 had more than one occurence of a known benign
+variable. This might seem like a less good feature, but the
+quantity is not the issue here, it is the weight of each
+variable. So: One may say that the higher the inverse vector
+is, the more likely it is that the PDF or javascript is
+benign. To clarify, next table shows variables fragmented by
+weight: Inverse vector separated by interval, the
+
+**Shadowserver 100k dataset** _The table shows that most
+malicious PDF files in the 100k Shadowserver dataset
+contains low-weighted scores when it comes to the inverted
+vector as a measure of how benign the scripts are._
+
+    Weight    interval	Instances	Instance percentage
+    <10	      15232	   35,6%
+    20<>9	    26852	 62,8%
+    30<>19	     136	~0%
+    40<>29	     148	~0%
+    50<>39	      87	~0%
+    60<>49	      28	~0%
+    >60	        253	   ~0%
+    Total	    42736	 -
+
+The inversion vector may as well be seen as a measure of the
+likeliness that the script is obfuscated. A quick look at
+the table shows that the characteristics of obfuscation is
+found in most PDF documents in the Shadowserver 100k
+dataset.
+
+Even though this part of the vector should be seen as an
+indication, analysts should be aware that threat agents may
+adapt to the detection technique and insert clear text
+variables such as the ones listed above in addition to their
+malicious javascripts. This latter would function as a
+primitive feature vector inversion jammer. In other words it
+should be seen in context with the other items of the
+javascript feature vector as well. Further, the concept
+should be further evolved to avoid such evasion. One
+technique to segment the code before analyzing it (giving
+each code segment a score, finally generating a overall
+probability score), making it more difficult for the threat
+agent to utilize noise in his obfuscation.
+
+### D. Clustering
+
+Experience shows that in practically oriented environments
+security analysis is, at least partially, done in a manual
+manner. This saying that the detection is based on
+indicators or anomalies and the analysis of the detection
+results is performed manually by an analyst. Though this may
+possibly be the approach resulting in least false positives
+it is overwhelming in regard to analysis of all potentially
+PDF documents in a larger organization. The 100k PDF dataset
+used in this paper is a evidence of such. So, how is it
+possible to automatically detect the interesting parts of
+the 100k PDF dataset? This question leads to the concept of
+data mining.
+
+The definition of data mining is the transformation of data
+to "meaningful patterns and rules".
+
+Michael Abernethy at IBM developerWorks20 covers data mining quite extensively.
+
+#### D.1. A Narrow Experiment and Results
+
+In this paper the goal is to achieve an view of the dataset
+in a way that is named "undirected" data mining: Trying to
+find patterns or rules in existing data. This is achieved
+through the feature vector previously presented.
+
+Up until now this paper has discussed how to generate an
+satisfactionary feature vector and what makes the measure of
+similarity. Let us do an experiment using WEKA (Waikato
+Environment for Knowledge Analysis) for analyzing our
+feature vector.
+
+Appendice 3 describes the ARFF format found from our feature
+vector and two of the previously presented feature vectors
+(SHA256:
+``5a61a0d5b0edecfb58952572addc06f2de60fcb99a21988394926ced4bbc8d1b``,
+``d3874cf113fa6b43e7f6e2c438bd500edea5cae7901e2bf921b9d0d2bf081201``)
+and a random selection of 2587 parseable PDF-documents from
+the dataset.
+
+In this experiement the feature vector were produced of 200
+random samples from the 100k dataset. Interesting in that
+regard is that the subdataset loaded from originally
+contained 6214 samples, while our application only handled
+the decoding of under half. The feature vector was extracted
+in a CSV format, converted by the following WEKA Java class
+and loaded in WEKA:
+
+    java -classpath /Applications/weka-3-6-6.app/Contents/Resources/Java/weka.jar weka.core.converters.CSVLoader dataset.csv
+
+In the WEKA preprocessing, the results may be visualized:
+
+![Fig. 6: Results 1; PDF Feature Vector DistributionA model
+ showing the PDF feature vector object distribution using
+ the 2587 parsable PDF
+ documents](/images/2015/02/Skjermbilde-2012-05-16-kl--13-17-20.png)
+
+### D.2. The complete dataset
+
+Next loading the complete feature vector dataset consisting
+of 42736 entities showed interesting results when
+clustering.
+
+![Fig. 7: Stringcount vs anomalies in the inverse
+ vector. Stringcount vs anomalies in the
+ inverse_vector. Using k-means algorithm and k=5. Medium
+ Jitter to emphasize the
+ clusters](/images/2015/02/Skjermbilde-2012-06-27-kl--11-40-19.png)
+
+The cluster process above also enables the possibility to
+look at the anomalies where the inverse_vector is high. For
+instance 9724 (the highest one in the Y-axis) the
+inverse_vector is 21510 which is a very clear anomaly
+compared to the rest of the clusters (the distance is
+far). This should encourage a closer look at the file based
+on the hash.
+
+The Shadowserver 100k ARFF dataset will be further evolved and may be found at the project GitHub page25.
+
+### E. Logging and Interpreting Errors
+
+Again and again while analyzing the 100k dataset the
+interpreter went on parsing errors. Bad code one may say,
+but a fact is that the threat agents are adapting their code
+to evading known tools and frameworks. An example of this is
+a recent bug21 in Stevens PDF parser where empty PDF objects
+in fact created an exception in the application.
+
+So, what does this have to do with this paper? Creative
+threat agents can never be avoided, creating malicious code
+that avoids the detection routines. This makes an important
+point, being that the application implemented should be
+using strict deobfuscation and interpretation routines. When
+an error occurs, which will happen sooner or later, the file
+should be traceable and manually analyzed. This in turn
+should lead to an adaption of the application. Where the
+routines fails will also be a characteristic of the threat
+agent: What part of the detection routines does he try to
+evade? E.g. in the 100k dataset an error on the
+ascii85-filter occurred. The parsing error made the
+parser-module not to output a feature vector, and were
+detected by error monitoring in log files.
+
+## Discussion and Conclusions
+
+In regard to being used standalone as evidence the feature
+vector will have its limitations, especially since its hard
+to connect it to an event it should be considered
+circumstancial.
+
+The PDF and ECMA standard are complex and difficult to
+interpret, especially when it comes to automation. As has
+been shown in this article a really hard problem is
+dynamically and generically executing javascripts for
+deobfuscation. This is also shown just in the Adobe Reader,
+where e.g. Adobe Reader X uses Spidermonkey 1.8, while
+previous more prevalent versions use version 1.7 of
+Spidermonkey. This often resulted in parsing errors, and
+again it will potentially cause a larger error rate in the
+next generation intrusion detection systems.
+
+It has been proved that a static analysis through a
+Jsunpack-n modification recovers good enough round-zero
+data, from a little less than half of the Shadowserver 100k
+dataset, to generate a characteristic of each file. The
+results were somewhat disappointing in regard to the
+extensive parsing errors. Parsing optimalization and error
+correction making the script more robust and reliable should
+be covered in a separate report. Despite the latter a good
+foundation and enough data were given to give a clue for
+what to expect from the extended PDF feature vector. Also,
+the inverse vector with its weighting gives a individual
+score to each document, making it exceptionally promising
+for further research.
+
+In regard to OLOVC a certain enhancement would be to combine
+it with the work of Franke' and Petrovic' "Improving the
+efficiency of digital forensic search by means of contrained
+edit distance". Their concept seems quite promising and
+might provide valuable input to OLOVC.
+
+The dataset used in this article may contain certain flaws
+in its scientific foundation. No dataset flaws, but
+indications that some data origins from the same source, has
+been seen throughout this article. The reason is most
+probably that the dataset was collected over three
+continuous days. Linked to the behaviour of malware it is
+known that certain malware such as drive-by attacks has
+peaks in its spread as a function of time. It is therefore
+natural to assume that there are larger occurences of PDF
+documents originating from the same threat agent. On the
+other side, in further research, this should be a measure of
+the effectiveness of algorithms ability to group the data.
+
+The Shadowserver 100k dataset only contains distinct
+files. It would be interesting to recollect a similar
+dataset with non-distinct hash-entries, and to cluster it by
+fuzzy hashing as well.
+
+Even though clustering is mentioned in the last part of this
+article, further extensive research should be done to
+completely explore the potential of using the current
+feature vector. In other words the scope of the article
+permitted for a manual selection of a feature vector and a
+more or less defined measure of similarity though the
+extended PDF feature vector.
+
+The project has a maintained GitHub page as introduced in
+the last section. This page should encourage further
+development into the extended PDF feature vector.
+
+If you'd like please have a look at the GuC Testimon Forensic Laboratory [21].
+
+
+[1] GuC Testimon Forensic Laboratory: https://sites.google.com/site/testimonlab/
--- a/data/osquery.md
+++ b/data/osquery.md
@ -0,0 +1,211 @@
+In another post I wrote about how telemetry is a challenge [1] of
+a changing and more diverse and modern landscape. Recently I have
+reviewed some device inventory and endpoint detection tools that
+will add to the solution. In the future I will get back to my view
+on Mozilla InvestiGator (MIG) [2], but this post will focus on a
+telemetry collection tool that I have grown fond of: osquery [3].
+
+osquery was originally developed by Facebook for the purpose of
+[4]:
+
+> Maintaining real-time insight into the current state of your infrastructure[...]
+
+With osquery data is abstracted, in the operating system in which
+the agent runs, to a SQL-based interface. It contains a
+near-infinite amount of available data, which is perfect to a
+network defender. osquery can even parse native sqlite-databases,
+which there are lots of in macOS. It also works in a distributed
+mode like GRR and MiG. In practical terms this means that queries
+are distributed. On the other hand, events can be streamed as well
+when considering operational security.
+
+![Example of the hardware_events table when plugging in and then detaching a Yubikey](/static/img/data/osquery_hardware_events.png)
+
+Since 2014 osquery has been open sourced and now has a large
+community developing about every aspect of the tool. According to
+the briefs that's online several major institutions, including
+Facebook, now uses osquery in service networks.
+
+osquery is cross-platform, and now supports: Linux, FreeBSD,
+Windows and macOS. That is also some of what separates it from its
+alternatives, like sysmon.
+
+Posts about osquery that you should review before moving on:
+
+* Doug Wilson's excellent presentation on FIRST 2018
+  (security-usage focused) [5]
+* Managing osquery with Kolide (an osquery tls server) [6]
+* Another post on applying osquery for security [7]
+* Palantir on osquery [8]
+
+So that was a couple of links to get you started. The next section shows you how to quickly get a lab environment up and running.
+
+## Setup and Configuration
+
+### Prerequisites
+
+There's only two things that you need setup for the rest of this
+article if you are on macOS, which can both be easily installed
+using Homebrew [9]:
+
+    brew install go yarn
+
+Also you need to configure your Go-path, which can basically be:
+
+    echo "export GOPATH=$HOME/go" >> ~/.bash_profile
+
+### Server Setup
+
+Setup Docker image of Kolide Fleet [10]:
+
+     mkdir -p $GOPATH/src/github.com/kolide
+     cd $GOPATH/src/github.com/kolide
+     git clone git@github.com:kolide/fleet.git
+     cd fleet
+     make deps && make generate && make
+     docker-compose up
+
+Populate the database:
+
+    ./build/fleet prepare db
+    
+You are now ready to boot up the web UI and API server:
+
+    ./build/fleet serve --auth_jwt_key=3zqHl2cPa0tMmaCa9vPSEq6dcwN7oLbP
+
+Get enrollment secret and certificate from the Kolide UI at
+``https://localhost:8080`` after doing the registration process.
+
+![Kolide enrollment](/static/img/data/kolide-enrollment.png)
+
+### Client Setup
+
+Make the API-token (enrollment secret) persistent at the
+end-point:
+
+    export {enrollment-secret} > /etc/osquery/enrollment.secret
+
+Define flags file in ``/private/var/osquery/osquery.flags``. This
+one the client uses to apply the centralised tls logging method,
+which is the API Kolide has implemented. It is also certificate
+pinned, so all is good.
+
+     --enroll_secret_path=/etc/osquery/enrollment.secret
+     --tls_server_certs=/etc/osquery/kolide.crt
+     --tls_hostname=localhost:8080
+     --host_identifier=uuid
+     --enroll_tls_endpoint=/api/v1/osquery/enroll
+     --config_plugin=tls
+     --config_tls_endpoint=/api/v1/osquery/config
+     --config_tls_refresh=10
+     --disable_distributed=false
+     --distributed_plugin=tls
+     --distributed_interval=10
+     --distributed_tls_max_attempts=3
+     --distributed_tls_read_endpoint=/api/v1/osquery/distributed/read
+     --distributed_tls_write_endpoint=/api/v1/osquery/distributed/write
+     --logger_plugin=tls
+     --logger_tls_endpoint=/api/v1/osquery/log
+     --logger_tls_period=10
+
+You can start the osquery daemon on the client by using the
+following command. At this point you should start thinking about
+packaging, which is detailed in the osquery docs [11].
+
+    /usr/local/bin/osqueryd --disable_events=false --flagfile=/private/var/osquery/osquery.flags
+
+osquery also has an interactive mode if you would like to test the
+local instance, based on a local configuration file:
+
+    sudo osqueryi --disable_events=false --config_path=/etc/osquery/osquery.conf --config_path=/etc/osquery/osquery.conf
+
+To make the client persistent on macOS, use the following
+documentation from osquery [12].
+
+### Managing the Kolide Configuration
+
+For this part I found what worked best was using the Kolide CLI
+client [13]:
+
+    ./build/fleetctl config set --address https://localhost:8080
+    ./build/fleetctl login
+    ./build/fleetctl apply -f ./options.yaml
+
+The ``options.yaml`` I used for testing was the following. This
+setup also involves setting up the osquery File Integrity
+Monitoring (FIM) [14], which I wasn't able to get working by the
+patching curl command [15] in the docs. The config monitors
+changes in files under ``/etc`` and a test directory at
+``/var/tmp/filetest``.
+ 
+    apiVersion: v1
+    kind: options
+    spec:
+      config:
+        decorators:
+          load:
+          - SELECT uuid AS host_uuid FROM system_info;
+          - SELECT hostname AS hostname FROM system_info;
+        file_paths:
+          etc:
+            - /etc/%%
+          test:
+            - /var/tmp/filetest/%%
+        options:
+          disable_distributed: false
+          distributed_interval: 10
+          distributed_plugin: tls
+          distributed_tls_max_attempts: 3
+          distributed_tls_read_endpoint: /api/v1/osquery/distributed/read
+          distributed_tls_write_endpoint: /api/v1/osquery/distributed/write
+          logger_plugin: tls
+          logger_tls_endpoint: /api/v1/osquery/log
+          logger_tls_period: 10
+          pack_delimiter: /
+      overrides: {}
+
+## Next Steps
+
+Through this article we've reviewed some of the basic capabilities
+of osquery and also had a compact view on a lab-setup
+demonstrating centralised logging, to Kolide, using the tls API of
+osquery.
+
+A couple of things that I would have liked to see was support for
+OpenBSD [16], Android and Ios [17].
+
+The local setup obviously does not scale beyond your own
+computer. I briefly toyed with the idea that this would be a
+perfect fit for ingesting into a Hadoop environment, and not
+surprising there's a nice starting point over at the Hortonworks
+forums [18].
+
+There's a lot of open source information on osquery. I also found
+the Uptycs blog useful [19].
+
+[1] https://secdiary.com/2018-02-25-telemetry.html  
+[2] https://mig.mozilla.org  
+[3] https://osquery.io  
+[4] https://code.fb.com/security/introducing-osquery/  
+[5]
+https://www.first.org/resources/papers/conf2018/Wilson-Doug_FIRST_20180629.pdf  
+[6]
+https://blog.kolide.com/managing-osquery-with-kolide-launcher-and-fleet-b33b4536acb4  
+[7] https://medium.com/@clong/osquery-for-security-part-2-2e03de4d3721  
+[8] https://github.com/palantir/osquery-configuration  
+[9] https://brew.sh  
+[10]
+https://blog.kolide.com/managing-osquery-with-kolide-launcher-and-fleet-b33b4536acb4
+[11] https://osquery.readthedocs.io/en/2.1.1/installation/custom-packages/
+[12] https://osquery.readthedocs.io/en/stable/installation/install-osx/  
+[13]
+https://github.com/kolide/fleet/blob/master/docs/cli/setup-guide.md  
+[14]
+https://osquery.readthedocs.io/en/stable/deployment/file-integrity-monitoring/  
+[15]
+https://github.com/kolide/fleet/tree/master/docs/api#file-integrity-monitoring  
+[16] https://github.com/facebook/osquery/issues/4703  
+[17] https://github.com/facebook/osquery/issues/2815  
+[18]
+https://community.hortonworks.com/articles/79842/ingesting-osquery-into-apache-phoenix-using-apache.html  
+[19] https://www.uptycs.com/blog
--- a/data/privacy-report-2014.md
+++ b/data/privacy-report-2014.md
@ -0,0 +1,69 @@
+I read in a Norwegian news publication yesterday that [more
+than 50% of Norwegians doesn't care about Internet and
+network surveillance [1]. In the original 60 page report
+(survey and report ordered by the Norwegian Data Protection
+Authority), named Privacy 2014 - The Current State and
+Trends ("Personvern 2014 - Tilstand og Trender"), 46% of the
+1501 participants state that they've gotten more concerned
+with privacy over the last 2-3 years.
+
+The follow up question that the survey presented was "How
+much do you care about privacy?". In the 1997 version of the
+survey 77% said they were "pretty engaged or very engaged"
+in privacy, while in 2013 there's an increase to 87%. Not as
+bad as the news publication wants it to be in other words. I
+guess what is referred to is mentioned in the section "The
+Chilling Effects in Norway", where more than half of the
+respondents states they haven't changed online behaviour
+after the revelations of the American surveillance
+methodologies. I think this correlates to the next section
+(below). Also, more than 45% state that they would have
+continued as normal if Norway were to start a massive
+surveillance campaign in collaboration with foreign
+intelligence.
+
+I read one section where asked "how much control of your own
+situation do you feel you have?". More than half of the
+respondents answered themselves, and 33% the government. The
+latter is pretty amazing in my opinion. It's obviously
+yourself that is responsible for your own situation. Seen in
+regard to that more than 78% wouldn't pay 20 bucks a month
+for privacy in online services it's even better.
+
+The report also have it's own section dedicated to the
+Snowden revelations. Pretty interesting that 53% responded
+that they didn't care about the surveillance, it is
+unproblematic or that it's just plain
+necessary. Interesting, considering that it's another nation
+state than Norway we're talking about here. I could have
+understood it if it was our own government, but another
+country? Anyways, that's the facts.
+
+One question that I perhaps miss in the survey is "have you
+done anything to protect your online presence from
+surveillance?". One of the alternatives could for instance
+be: "I use end-to-end encryption, such as GPG". It was
+obviously not that technical a survey, and I can respect
+that - but at the same time I see that's where it have to
+end at some point. Thinking if I was employed in another
+type of occupation: I think people would have continued as
+normal if we get a mass-surveillance state because you get
+to a point of exhaustion due to the complexity of the
+technology and lack of knowledge on how to actually protect
+yourself. I also think that the hypothetical question of
+awareness of a mass-surveillance state would have had more
+chilling effects than people actually respond. The question
+actually reminds me of the Iron Curtain period, thinking
+that you are always surveilled.
+
+The survey can be read in full here [2] (Norwegian), and I
+think it's pretty good and thorough on the current state of
+privacy in Norway. The survey was delivered by Opinion
+Perduco. The 1997 survey was delivered by Statistics Norway.
+
+
+[1] http://translate.google.com/translate?sl=auto&tl=en&js=n&prev=_t&hl=en&ie=UTF-8&u=http%3A%2F%2Fwww.digi.no%2F926712%2Fhalvparten-gir-blaffen  
+[2] https://www.datatilsynet.no/Nyheter/2014/Personvern-2014-tilstand-og-trender-/  
+
+
+
--- a/data/relayd-multidomain.md
+++ b/data/relayd-multidomain.md
@ -0,0 +1,134 @@
+While running a relayd service for a multi-domain instance
+recently I quickly came into an issue with relayd routing.
+
+relayd(8) is the relay daemon in OpenBSD.
+
+I run two local services that I front with relayd:
+
+* service A
+* service B
+
+These two I define in relayd.conf(5):
+
+    ext_addr="<SOME-IP>"
+    honk_port="31337"
+    inks_port="31338"
+    table <serviceA> { 127.0.0.1 }
+    table <serviceB> { 127.0.0.1 }
+
+To make sure relayd logs sufficiently for traceability I apply the
+following options:
+
+    log state changes
+	log connection
+
+The next part of my relayd.conf is creating a configuration for
+the relay service ("protocols are templates defining settings and rules for relays"):
+
+    http protocol https { }
+
+For the service definition I make sure to add the remote address
+and local address:
+
+      match request header append "X-Forwarded-For" value "$REMOTE_ADDR"
+      match request header append "X-Forwarded-By" \
+        value "$SERVER_ADDR:$SERVER_PORT"
+
+A further important logging configuration comes next, and I make
+sure my relay logs the host, X-Forwarded-For, User-Agent, 
+Referer and url:
+    
+    match header log "Host"
+    match header log "X-Forwarded-For"
+    match header log "User-Agent"
+    match header log "Referer"
+    match url log
+
+For performance [1]:
+
+    tcp { nodelay, sack, socket buffer 65536, backlog 100 }
+
+Next I disable vulnerable ciphers:
+
+    tls no tlsv1.0
+    tls no tlsv1.1
+    tls tlsv1.2
+
+Sadly tlsv1.3 is still in -current, so we will have to wait for
+that.
+
+I configure keys like follows:
+
+    tls ca cert "/etc/ssl/cert.pem"
+    tls keypair serviceA.domain
+    tls keypair serviceB.domain
+
+Finally we use the tables defined initially to route traffic to
+the right internal service:
+
+    match request header "Host" value "serviceA.domain" forward to <serviceA>
+    match request header "Host" value "serviceB.domain" forward to <serviceB>
+
+And that is it for the service definition.
+
+In addition we define the relay ("relays will forward traffic
+between a client and a target server") like follows. The "protocol
+https" is the junction between the two parts of the config.
+
+    relay https_relay {
+      listen on $ext_addr port https tls
+      protocol https
+      
+      forward to <honk> port $honk_port check tcp
+      forward to <inks> port $inks_port check tcp
+    }
+
+The whole config:
+
+ext_addr="159.100.245.242"
+honk_port="31337"
+inks_port="31338"
+table <honk> { 127.0.0.1 }
+table <inks> { 127.0.0.1 }
+
+log state changes
+log connection
+
+http protocol https {
+  match request header append "X-Forwarded-For" value "$REMOTE_ADDR"
+  match request header append "X-Forwarded-By" \
+    value "$SERVER_ADDR:$SERVER_PORT"
+  match request header set "Connection" value "close"
+
+  match header log "Host"
+  match header log "X-Forwarded-For"
+  match header log "User-Agent"
+  match header log "Referer"
+  match url log
+
+  tcp { nodelay, socket buffer 65536, backlog 100 }
+
+  tls no tlsv1.0
+  tls no tlsv1.1
+  tls tlsv1.2
+  tls ca cert "/etc/ssl/cert.pem"
+  
+  tls keypair cybsec.network
+  tls keypair inks.cybsec.network
+  
+  match request header "Host" value "cybsec.network" forward to <honk>
+  match request header "Host" value "inks.cybsec.network" forward to <inks> 
+}
+
+relay https_relay {
+  listen on $ext_addr port https tls
+  protocol https
+
+  forward to <honk> port $honk_port check tcp
+  forward to <inks> port $inks_port check tcp
+}
+
+
+[1] https://calomel.org/relayd.html
+
+    
--- a/data/remote-forensics.md
+++ b/data/remote-forensics.md
@ -0,0 +1,159 @@
+Like  everything  else  in   information  security,  forensics  is
+constantly  evolving.    One  matter   of  special   interest  for
+practitioners  is doing  forensics on  remote computers,  not that
+it's entirely new.
+
+The use-case  is self-explanatory to  those working in  the field,
+but for the beginners I'll give a brief introduction.
+
+When you  get a case  on your desk and  it lights up  as something
+interesting, what do you do? Probably your first step is searching
+for known malicious indicators in network logs.  Finding something
+interesting on  some of the clients,  let's say ten in  this case,
+you decide to  put some more effort into explaining  the nature of
+the activity.  None of the clients is nearby, multiple of them are
+even on locations with 1Mbps upload speeds.
+
+The next phase would probably be a search in open sources, perhaps
+turning out in support of something fishy going on.  Now you'd like
+to examine  some of the client  logs for known hashes  and strings
+you found,  and the traditional  way to  go is acquiring  disk and
+memory images physically.  Or is it? That would  have easily taken
+weeks for ten clients.  In this case you are lucky  and you have a
+tool for performing remote forensics at hand. The tool was a major
+roll-out for your organization after a larger breach.
+
+What's new in remote forensics is that the tools begin to get more
+mature, and  by that  I would  like to  introduce two  products of
+which I find most relevant to the purpose:
+
+* Google Rapid Response (GRR) [1]
+* Mandiant for Incident Response (MIR) [2]
+
+Actually I haven't put the latter option to the test (MIR supports
+OpenIOC which is an advantage) - but I have chosen to take GRR
+for a spin for some time now. There are also other tools which may
+be of interest to you such  as Sourcefire FireAmp which I've heard
+performs well for end-point-protection.  I've chosen to leave that
+out   this  presentation   since   this  is   about  a   different
+concept. Surprisingly the following will use GRR as a basis.
+
+For this  post there are  two prerequisites  for you to  follow in
+which I highly recommend to get the feel with GRR:
+
+* Setup a GRR server [3]. In  this post I've used the current beta
+  3.0-2, running all  services on the same  machine, including the
+  web server  and client roll-in  interface. There is  one install
+  script for the beloved Ubuntu here, but I couldn't get it easily
+  working on  other systems.  One exception  is Debian  which only
+  needed minor changes. If you  have difficulties with the latter,
+  please give me a heads-up.
+* Sacrifice one client (it won't  brick a production system as far
+  as I  can tell  either though)  to be  monitored. You  will find
+  binaries after packing the clients  in the GRR Server setup. See
+  the screenshot below for  details. The client will automatically
+  report in to the server.
+
+You can find the binaries by  browsing from the home screen in the
+GRR web GUI. Download and install the one of choice.
+
+A word  warning before  you read  the rest of  this post:  The GRR
+website ~~is~~  was a little  messy and not entirely  intuitive. I
+found, after a lot of searching, that  the best way to go about it
+is reading the code usage examples in the web GUI, especially when
+it comes to  what Google named flows. Flows are  little plugins in
+GRR that may for  instance help you task GRR to fetch  a file on a
+specific path.
+
+Notice  the call  spec. This  can be  transferred directly  to the
+iPython  console.  Before  I started  off  I watched  a couple  of
+presentations  that Google  have delivered  at LISA.  I think  you
+should too if you'd like to see where GRR is going and why it came
+to be.  The one here gives  a thorough introduction on  how Google
+makes  sure  they  are  able  to  respond  to  breaches  in  their
+infrastructure [4].
+
+I would also  like to recommend an presentation by  Greg Castle on
+BlackHat for reference  [5].  For usage and  examples Marley Jaffe
+at Champlain College have put up a great paper. Have a look at the
+exercises at the end of it.
+
+What  is good  with  GRR is  that it  supports  the most  relevant
+platforms: Linux, Windows  and OS X. This is  also fully supported
+platforms at Google, so expect development to have a practical and
+long-term perspective.
+
+While  GRR  is  relevant,  it  is  also  fully  open  source,  and
+extensible.  It's  written in  Python with  all the  niceness that
+comes  with it.   GRR have  direct memory  access by  custom built
+drivers. You will find support  for Volatility in there. Well they
+forked it into a new project named Rekall which is more suited for
+scale. Anyways it provides support for plugins such as Yara.
+
+If  you  are like  me  and  got  introduced to  forensics  through
+academia, you will like that GRR builds on Sleuthkit through pytsk
+for disk forensics (actually you  may choose what layer you'd like
+to stay  on). When you've retrieved  an item, I just  love that it
+gets  placed  in  a  virtual  file system  in  GRR  with  complete
+versioning.
+
+The virtual  filesystem where  all the  stuff you've  retrieved or
+queried  the  client  about  is stored  with  versioning  for  you
+pleasure. In  addition to  having a way-to-go  console application
+GRR provides  a good web  GUI which  provides an intuitive  way of
+browsing about everything you can do  in the console.  I think the
+console is where Google would like you to live though.
+
+An  so I  ended up  on the  grr_console which  is a  purpose-build
+iPython  shell, writing  scripts for  doing  what I  needed it  to
+do. Remember  that call spec  that I mentioned initially,  here is
+where  it gets  into  play. Below  you see  an  example using  the
+GetFile call spec (notice that  the pathspec in the flow statement
+says OS, this might as well have been ``REGISTRY`` or ``TSK``):
+
+
+    token = access_control.ACLToken(username="someone", reason="Why")
+
+    flows=[]
+    path="/home/someone/nohup.out"
+
+    for client in SearchClients('host:Webserver'):
+      id=client[0].client_id
+      o=flow.GRRFlow.StartFlow(client_id=str(id),
+      flow_name="GetFile", pathspec=rdfvalue.PathSpec(path=path, pathtype=rdfvalue.PathSpec.PathType.OS))
+      flows.append(o)
+
+    files=[]
+    while len(flows)>0:
+      for o in flows:
+        f=aff4.FACTORY.Open(o)
+        r = f.GetRunner()
+        if not r.IsRunning():
+          fd=aff4.FACTORY.Open(str(id)+"/fs/os%s"%path, token=token)
+          files.append(str(fd.Read(10000)))
+          flows.remove(o)
+
+If interested  in Mandiant IR (MIR)  and its concept, I'd  like to
+recommend another Youtube video by  Douglas Wilson, which is quite
+awesome as well [7].
+
+Update 2020: Today I wouldn't recommend MIR/FireEye HX, but rather
+something  like  LimaCharlie  [8]  due  to  the  lack  of  hunting
+capabilities in the HX platform.
+
+
+[1] https://github.com/google/grr
+
+[2] http://www.fireeye.com/products-and-solutions/endpoint-forensics.html
+
+[3] https://grr-doc.readthedocs.io/en/latest/installing-grr-server/index.html
+
+[4] https://2459d6dc103cb5933875-c0245c5c937c5dedcca3f1764ecc9b2f.ssl.cf2.rackcdn.com/lisa13/castle.mp4
+
+[5] GRR: Find All The Badness - https://docs.google.com/file/d/0B1wsLqFoT7i2Z2pxM0wycS1lcjg/edit?pli=1
+
+[6] Jaffe, Marley. GRR Capstone Final Paper
+
+[7] NoVA Hackers Doug Wilson - Lessons Learned from using OpenIOC: https://www.youtube.com/watch?v=L-J5DDG_SQ8
+
+[8] https://www.limacharlie.io/
--- a/data/signals-feeds.md
+++ b/data/signals-feeds.md
@ -0,0 +1,219 @@
+
+## Key Takeaways
+
+* It is possible to index and tag a high number of RSS, OTX and
+  Twitter articles on limited computational power in seconds
+* Building logic around timestamps is complex
+* Structuring the resulting data in a graph is meaningful.
+
+## Introduction
+
+Today I am sharing some details about one of the multi-year
+projects I am running. The project motivation is:
+
+> To stay up to date on cyber security developments within days.
+
+I didn't want a realtime alerting service, but an analysis tool to
+gather important fragments of data over time. These fragments
+makes up the basis of my open source research. The curated
+information usually ends up on a channel like an NNTP feed,
+sometimes with added comments.
+
+My solution was to create a common interface to ingest and search
+content from third party sources, Achieving this is difficult, and
+requires some work, but I found it feasible.
+
+Going throught some basic research I found that much of what
+happens on the web eventually ends up on one of the following
+three places (e.g. a mention):
+
+1. OTX
+2. Twitter
+3. RSS
+
+After some work I found that there were two things important to me
+in the first iteration:
+
+1. Being able to recognize the characteristics of the content
+2. Knowing the publish time of the data
+
+The primary problem was thus to build a program that scales with a
+large number of feeds.
+
+Going from there I built a prototype in Python, which I've now
+matured into a more performant Golang version. What follows from
+here is my experience from that work.
+
+The tested component list of the program I am currently running are:
+
+* Gofeed [1]
+* Badger [2]
+* Apache Janusgraph [3,4]
+* Apache Cassandra [5]
+* Go-Twitter [6]
+* Alienvault OTX API [7]
+* Araddon Dateparse [8]
+
+[1] https://github.com/mmcdole/gofeed  
+[2] https://github.com/dgraph-io/badger  
+[3] https://janusgraph.org  
+[4] https://docs.janusgraph.org/basics/gremlin/  
+[5] https://cassandra.apache.org  
+[6] https://github.com/dghubble/go-twitter/twitter  
+[7] https://github.com/AlienVault-OTX/OTX-Go-SDK/src/otxapi   
+[8] https://github.com/araddon/dateparse  
+
+
+
+## The Lesson of Guestimation: Not All Feeds Are Created Equal
+
+Timestamps is perhaps some of the more challenging things to
+interpret in a crawler and search engine. RSS is a loose standard,
+at least when it comes to implementation. This means that
+timestamps may vary: localized, invalid per the RFC standards,
+ambiguous, missing and so on. Much like the web otherwise. Luckily
+without javascript.
+
+The goal is simply about recognizing what timestamp are the most
+correct one. A feed may contain one form of timestamp, while a
+website may indicate another one. To solve this I use and compare
+two levels of timestamping:
+
+* The feed published, updated and all items individual timestamps
+* The item and website last modified timestamps
+
+Looking back, solving the first level of timestamping was
+straight forward. These timestamps are present in the feed and for
+RSS the logic to build a list of timestamps would look like this:
+
+
+    /* First we check the timestamp of all
+    *  feed items (including the primary).
+    *  We then estimate what is the newest
+    *  one */
+    var feedElectedTime time.Time
+    var ts = make(map[string]string)
+    ts["published"] = feed.Published
+    ts["updated"] = feed.Updated
+    var i=0
+    for _, item := range feed.Items {
+	    ts[strconv.Itoa(i)] = item.Published
+	    i++
+	    ts[strconv.Itoa(i)] = item.Updated
+	    i++
+    }
+    feedElectedTime, _, err = tsGuestimate(ts, link, false)
+
+The elected time can be used to compare with a previous feed
+checkpoint to avoid downloading all items again. Using the above
+logic I was also able to dramatically increase the success rate of
+the program, since it requires a valid timestamp. The
+`tsGuestimate` logic is something for a future post.
+
+Further the item/website timestamps requires a similar method, but in
+addition I found it an advantage to do a HTTP HEAD request to the
+destination URL to combine with the timestamps available from the
+feed. The central and important aspect here is to abort retrieval
+if an item already exists in the database, this is dramatically
+increases the processing in each run.
+
+False timestamps are a problem. I noticed that websites publish
+feeds with dynamic timestamps, which means that when you retrieve
+the feed it adds the timestamp of now. This obviously creates
+resource-intesive operations since the whole feed is then at risk
+for re-indexing each run.
+
+
+## Noise Reduction: Recognizing Content Characteristics
+
+Retrieving content is possible in several ways. For recognizing the
+content I opted for and have success/good coverage using
+regex. This is also some of the good things of curating articles,
+since this means experience with questions such as "why did I miss
+this article?" evolves into a new iteration of the program input.
+
+For instance, to stay on top of targeted cyber operations, I found
+that much used phrases in articles was "targeted attack" and
+"spear phishing". So based on that I deployed the following
+keyword search (regular expression) which applies to every new
+item ingested:
+
+    "targeted":"(?i)targeted\\satt|spear\\sp",
+
+So a new article containing "targeted attack" in the body or title
+is tagged with a hotword "targeted". Another hotword could be
+"breach".
+
+Perhaps not surprising this data can be modelled in a graph like
+follows.
+
+    Tweet ─> URL in tweet ┌─> Targeted
+	                      └─> Breach
+
+## A Practical Example
+
+Traversing a news graph, we can go from the hotword "targeted", to
+all items and articles for the past days linked to the hotword.
+
+I use Gremlin for querying. An example is shown below (some
+details omitted):
+
+    keyw="targeted"
+    _date="2021-02-10"
+    g.V().hasLabel('hotword').has('title',keyw).as("origin_hw").
+      in().in().hasLabel('article:m').has('timestamp',gte(_date)).order().by('timestamp',asc).as('article').
+      .select("origin_hw","article").by(values('title','timestamp'))
+
+The procedure above summarized:
+
+1. Find the node with the keyword "targeted"
+2. Find all articles (for instance a tweet) that are two steps out
+   from the keyword (since these may be linked via a content node)
+3. Get title and timestamp from hotword and tweet
+
+Using a match, which was incidentally not a tweet but an article,
+from a RSS feed, we find the following:
+
+    ==>{origin_hw=targeted, article=WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK}
+
+Retrieving the article with Gremlin, we can decide the source:
+
+    gremlin > g.V().has('title','WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK').valueMap()
+
+
+    =>{link=[https://www.reddit.com/r/netsec/.rss], 
+    title=[WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK], 
+    src=[Reddit - NetSec], 
+    src_type=[rss],
+    sha256=[8a285ce1b6d157f83d9469c06b6accaa514c794042ae7243056292d4ea245daf],
+    added=[2021-02-12 10:42:16.640587 +0100 CET],
+    timestamp=[2021-02-10 20:31:06 +0000 +0000], 
+    version=[1]}
+
+    ==>{link=[http://www.reddit.com/r/Malware/.rss], 
+    title=[WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK], 
+    src=[Reddit - Malware], 
+    src_type=[rss],
+    sha256=[69737b754a7d9605d11aecff730ca3fc244c319f35174a7b37dd0d1846a823b7],
+    added=[2021-02-12 10:41:48.510538 +0100 CET],
+    timestamp=[2021-02-10 20:35:11 +0000 +0000],
+    version=[1]}
+
+In this instance the source was two Reddit posts which triggered
+the keyword in question and others about a targeted incident in
+China. Additionally this triggered a zero day hotword.
+
+
+## Summary
+
+Through this post I have shown some key parts of how to build a
+feed aggregator that can scale to thousands of feeds on a single
+computer, with update times in seconds.
+
+I have also given a brief view on how Janusgraph and similar
+systems can be used to model such data in a way which makes it
+possible to search, find and eventually stay up to date on
+relevant information to cyber security.
+
+When in place such a system may save hours per day since the data
+is normalised and searchable in one place.
--- a/data/ssh-ca-proxyjump.md
+++ b/data/ssh-ca-proxyjump.md
@ -0,0 +1,228 @@
+## Key Takeaways
+
+* SSH has a key-signing concept that in combination with a
+  smartcard provides a lean, off-disk process
+* A SSH-CA provides the possibility of managing access
+  without a central point of failure
+* The use of SSH Jumphost is an easier way to tunnel
+  sessions end-to-end encrypted, while still maintaining
+  visibility and control through a central point
+
+## Introduction
+
+This post is an all-in-one capture of my recent discoveries with
+SSH. It is an introduction for a technical audience.
+
+It turns out that SSH is ready for a zero trust and
+microsegmentation approach, which is important for
+management of servers. Everything described in this post is
+available as open source software, but some parts require a
+smartcard or two, such as a Yubikey (or a Nitrokey if you
+prefer open source. I describe both).
+
+I also go into detail on how to configure the CA key without
+letting the key touch the computer, which is an important
+principle.
+
+The end-result should be a more an architecture providing a better
+overview of the infrastructure and a second logon-factor
+independent of phones and OATH.
+
+## SSH-CA
+
+My exploration started when I read a 2016-article by
+Facebook engineering [1]. Surprised, but concerned with the
+configuration overhead and reliability I set out to test the
+SSH-CA concept. Two days later all my servers were on a new
+architecture.
+
+SSH-CA works predictably like follows:
+
+                                           [ User generates key on Yubikey ]
+                                                            |
+                                                            |
+                                                            v
+    [ ssh-keygen generates CA key ] --------> [ signs pubkey of Yubikey ]
+                    |                           - for a set of security zones
+                    |                           - for users
+                    |                                       |
+                    |                                       |
+                    |                                       v
+                    v                         pubkey cert is distributed to user
+    [ CA cert and zones pushed to servers ]     - id_rsa-cert.pub
+      - auth_principals/root (root-everywhere)
+      - auth_principals/web (zone-web)
+
+The commands required in a nutshell:
+
+    # on client
+    $ ssh-keygen -t rsa
+    
+    # on server
+    $ ssh-keygen -C CA -f ca
+    $ ssh-keygen -s ca -I <id-for-logs> -n zone-web -V +1w -z 1 id_ecdsa.pub
+    
+    # on client
+    cp id_ecdsa-cert.pub ~/.ssh/
+
+Please refer to the next section for a best practice storage
+of your private key.
+
+On the SSH server, add the following to the SSHD config:
+
+    TrustedUserCAKeys /etc/ssh/ca.pub
+    AuthorizedPrincipalsFile /etc/ssh/auth_principals/%u
+
+What was conceptually new for me was principals and
+authorization files per server. This is how it works:
+
+1. Add a security zone, like zone-web, during certificate
+   signing - "ssh-keygen * -n zone-web *". Local username does
+   not matter
+2. Add a file per user on the SSH server, where zone-web
+   is added where applicable -
+   e.g. "/etc/ssh/auth_principals/some-user" contains "zone-web"
+3. Login with the same user as given in the zone file - "ssh some-user@server"
+
+This is the same as applying a role instead of a name to the
+authorization system, while something that IDs the user is
+added to certificate and logged when used.
+
+This leaves us with a way better authorization and
+authentication scheme than authorized_keys that everyone
+uses. Read on to get the details for generating the CA key
+securely.
+
+
+## Keeping Private Keys Off-disk
+
+An important principle I have about private keys is to
+rather cross-sign and encrypt two keys than to store one on
+disk. This was challenged for the SSH-CA design. Luckily I found
+an article describing the details of PKCS11 with ssh-keygen
+[2]:
+
+> If you're using pkcs11 tokens to hold your ssh key, you
+> may need to run ssh-keygen -D $PKCS11_MODULE_PATH
+> ~/.ssh/id_rsa.pub so that you have a public key to
+> sign. If your CA private key is being held in a pkcs11
+> token, you can use the -D parameter, in this case the -s
+> parameter has to point to the public key of the CA.
+
+Yubikeys on macOS 11 (Big Sur) requires the yubico-piv-tool
+to provide PKCS#11 drivers. It can be installed using
+Homebrew:
+
+    $ brew install yubico-piv-tool
+    $ PKCS11_MODULE_PATH=/usr/local/lib/libykcs11.dylib
+
+Similarly the procedure for Nitrokey are:
+
+    $ brew cask install opensc
+    $ PKCS11_MODULE_PATH=/usr/local/lib/opensc-pkcs11.so
+
+Generating a key on-card for Yubikey:
+
+    $ yubico-piv-tool -s 9a -a generate -o public.pem
+
+For the Nitrokey:
+
+    $ pkcs11-tool -l --login-type so --keypairgen --key-type RSA:2048
+
+Using the exported CA pubkey and the private key on-card a
+certificate may now be signed and distributed to the user.
+
+    $ ssh-keygen -D $PKCS11_MODULE_PATH -e  > ca.pub
+
+    $ ssh-keygen -D $PKCS11_MODULE_PATH -s ca.pub -I example -n zone-web -V +1w -z 1 id_rsa.pub
+    Enter PIN for 'OpenPGP card (User PIN)': 
+    Signed user key .ssh/id_rsa-cert.pub: id "example" serial 1 for zone-web valid from 2020-10-13T15:09:00 to 2020-10-20T15:10:40
+
+The same concept goes for a user smart-card, except that is
+a plug and play as long as you have the gpg-agent
+running. When the id_rsa-cert.pub (the signed certificate of
+e.g. a Yubikey) is located in ~/.ssh, SSH will find the
+corresponding private key automatically. The workflow will
+be something along these lines:
+
+    [ User smartcard ] -----------> [ CA smartcard ]
+             ^          id_rsa.pub          |
+             |                              | signs
+             |------------------------------|
+               sends back id_rsa-cert.pub
+
+
+## A Simple Bastion Host Setup
+
+The other thing I wanted to mention was the -J option of
+ssh, ProxyJump.
+
+ProxyJump allows a user to confidentially, without risk of a
+man-in-the-middle (MitM), to tunnel the session through a
+central bastion host end-to-end encrypted.
+
+Having end-to-end encryption for an SSH proxy may seem
+counter-intuitive since it cannot inspect the
+content. I believe it is the better option due to:
+
+* It is a usability compromise, but also a security
+  compromise in case the bastion host is compromised.
+* Network access and application authentication (and even
+  authorization) goes through a hardened point.
+* In addition the end-point should also log what happens on
+  the server to a central syslog server.
+* A bastion host should always be positioned in front of the
+  server segments, not on the infrastructure perimeter.
+
+A simple setup looks like the following:
+
+    [ client ]  ---> [ bastion host ] ---> [ server ]
+
+
+Practically speaking a standalone command will look like
+follows:
+
+    ssh -J jump.example.com dest.example.com
+
+
+An equivalent .ssh/config will look like:
+
+     Host j.example.com
+     HostName j.example.com
+     User sshjump
+     Port 22
+
+     Host dest.example.com
+     HostName dest.example.com
+     ProxyJump j.example.com
+     User some-user
+     Port 22
+
+With the above configuration the user can compress the
+ProxyJump SSH-command to "ssh dest.example.com".
+
+## Further Work
+
+The basic design shown above requires one factor which is
+probably not acceptable in larger companies: someone needs
+to manually sign and rotate certificates. There are some
+options mentioned in open sources, where it is normally to
+avoid having certificates on clients and having an
+authorization gateway with SSO. This does however introduce
+a weakness in the chain.
+
+I am also interested in using SSH certificates on iOS, but
+that has turned out to be unsupported in all apps I have
+tested so far. It is however on the roadmap of Termius,
+hopefully in the near-future. Follow updates on this subject
+on my Honk thread about it [4].
+
+For a smaller infrastructure like mine, I have found the
+manual approach to be sufficient so far.
+
+
+[1] Scalable and secure access with SSH: https://engineering.fb.com/security/scalable-and-secure-access-with-ssh/  
+[2] Using a CA with SSH: https://www.lorier.net/docs/ssh-ca.html  
+[3] Using PIV for SSH through PKCS #11:
+https://developers.yubico.com/PIV/Guides/SSH_with_PIV_and_PKCS11.html  
+[4] https://cybsec.network/u/tommy/h/q1g4YC31q45CT4SPK4  
--- a/data/ssh-certs-apple-t2.md
+++ b/data/ssh-certs-apple-t2.md
@ -0,0 +1,94 @@
+## Key Takeaways
+
+* SSH certificates can be used with the Apple T2 chip on
+  macOS as an alternative to external smart cards,
+  authenticated with a fingerprint per session.
+* The Mac T2 chip serves as an extra security layer by creating
+  private keys in the secure enclave.
+* The CA can be stored on an external smartcard, only
+  signing for access in a limited period - again limiting
+  the exposure.
+
+## Introduction
+
+Over the past days I have been going down a deep, deep
+rabbit hole of SSH proxy jumping and SSH certificates
+combined with smart cards.
+
+After playing around with smart cards for SSH, I recognized
+that not only external smart cards such as the Yubikey or
+Nitrokey is a possible lane to go down.
+
+Mac computers comes with a security chip called T2. This chip is
+also known to host something Apple calls Secure Enclave [1]. In
+the Secure Enclave you can store keys.
+
+It will probably not serve as an equally secure solution as with
+external smart cards, but it is a better balance for usability.
+
+The T2 is permanently stored in hardware on one host only,
+so the access needs to be signed on a per-host basis. In
+such I would say the T2 and external smart cards complement
+each other.
+
+Always having the key available will bring two additional
+vulnerabilities:
+
+* If compromised, the key is always available logically
+* Separation of equipment and key is not possible e.g. in a
+  travel situation
+
+With a central pubkey directory tied to an identity
+(automated), the T2 can be of better use for an enterprise
+setup.
+
+## Setting up a Private Key in Secure Enclave
+
+While fiddling around I found sekey on Github [2]. The
+project seems abandoned, but it is the secure enclave that
+does the heavy lifting.
+
+The short and easy setup are:
+
+    $ brew cask install sekey
+    $ echo "export SSH_AUTH_SOCK=$HOME/.sekey/ssh-agent.ssh" >> ~/.zshrc
+    $ echo "IdentityAgent ~/.sekey/ssh-agent.ssh" >> ~/.ssh/config
+    $ source ~/.zshrc
+
+A keypair can now be generated in the secure enclave by:
+
+    $ sekey --generate-keypair SSH
+    $ sekey --list-keys
+
+Now export the public key of the curve generated on-chip:
+
+    $ sekey --export-key <id> > id_ecdsa.pub
+
+Using the trick we found in our recent venture into using
+smart cards for signing the key, we can used PCKS#11 without
+compromising security [3]. In this case I use a Nitrokey:
+
+    $ brew cask install opensc
+    $ PKCS11_MODULE_PATH=/usr/local/lib/opensc-pkcs11.so
+    $ ssh-keygen -D $PKCS11_MODULE_PATH -e > ca.pub
+    $ ssh-keygen -D $PKCS11_MODULE_PATH -s ca.pub -I example -n zone-web -V +1h -z 1 id_ecdsa.pub
+    Enter PIN for 'OpenPGP card (User PIN)': 
+    Signed user key id_ecdsa-cert.pub: id "example" serial 1 for zone-web valid from 2020-10-14T20:26:00 to 2020-10-14T21:27:51
+    cp id_ecdsa-cert.pub ~/.ssh/
+
+If you now try to ssh into a server using the given
+certificate authority as shown in the SSH-CA post [3],
+access should be granted with a fingerprint.
+
+## A Word of Caution
+
+The T2 has some vulnerabilities shown recently [4]. Make
+sure to include these in your risk assessment of using
+it. If you won't go down the smart card route it will still
+be better than storing the key on disk.
+
+
+[1] https://support.apple.com/guide/security/secure-enclave-overview-sec59b0b31ff/web  
+[2] https://github.com/sekey/sekey  
+[3] https://secdiary.com/2020-10-13-ssh-ca-proxyjump.html  
+[4] https://inks.cybsec.network/tag/t2  
--- a/data/telemetry.md
+++ b/data/telemetry.md
@ -0,0 +1,250 @@
+Telemetry for cyber security is currently at a
+crossroads. While past methods have been efficient by being
+based on network monitoring, the current revolution in
+encryption and the distributed workspace makes it
+insufficient to solely rely on network monitoring. Through
+this post we are going to focus on the current challenges.
+
+> Telemetry is an electrical apparatus for measuring a
+> quantity (such as pressure, speed, or temperature) and
+> transmitting the result especially by radio to a distant
+> station  
+> – Meriam Webster
+
+Telemetry, a term mostly used by AV-vendors, have become
+broadly applied as services change from a central to
+decentralised geographically spread. Yesterday an employee
+would work at his desk from 9-5 and then go home, while
+today's modern worker moves around the office area and can
+basically work from anywhere in the world when they feel
+like it.
+
+In cyber security, telemetry can generally be categorised
+in: 1) Network-centric and 2) endpoint-based. A complete
+telemetry profile is essential for being able to monitor
+security events and to execute retrospective
+analysis. Through my recent article on indicators [1] I
+proposed a structure for indicators organised in three
+levels of abstraction. In this article a telemetry profile
+means something that covers a degree of these three levels.
+
+    | Level of abstraction  |    | Formats
+    |-----------------------|----|-------------
+    | Behavior              |    | MITRE (PRE-)ATT&CK
+    |-----------------------|--->|-------------
+    | Derived               |    | Suricata+Lua, Yara
+    |-----------------------|--->|-------------
+    | Atomic                |    | OpenIOC 1.1
+    
+    
+## The Challenges
+
+There are generally two problems that needs to be fully
+solved when collecting data for cyber security:
+
+* The use of encryption from end-to-end
+* Workers and thereby the defended environment are or will be distributed
+
+As of February 2017 the web was 50% encrypted [2]. Today
+that number [3] is growing close to 70%.
+
+For defense purposes, it is possible to identify malicous
+traffic, such as beaconing, through metadata analysis. There
+have been some developments on detecting anomalies in
+encrypted content lately - namely the fingerprinting of
+programs using SSL/TLS. In the future I believe this will be
+the primary role of network-based detection. This is
+actually a flashback to a pre-2010 monitoring environment
+when full content was rarely stored and inspected by
+security teams.
+
+An additional element to consider is the previous debate
+about public key pinning, which has now evolved into
+Expect-CT [4]. This means that man in the middle (MitM)
+techniques is going to be a no-no at some point. Yes, that
+includes your corporate proxy as well.
+
+There is one drawback and dealbreaker with the above for
+security teams: it requires access to the datastream used by
+the endpoints to be fully effective.
+
+VPNs are going away as more resilient and modern network
+architectures will become dominating. The most promising
+challenger at the moment is the Beyondcorp [5] (based on
+zero trust) architecture proposed by Google more than six
+years ago. A zero trust architecture means that clients will
+only check in to the corporate environment at the points
+that _they_ need or are in the vicinity of corporate
+resources. Other activity, such as browsing on external
+websites are actually no longer going via the corporate
+infrastructure or its monitored links. Additionally, the
+endpoint is easily the most common infiltration vector.
+
+To be honest, the Beyondcorp model reflects to a larger
+extent how humans actually interact with computers. Humans
+have never been confined to the perimeter of the enterprise
+network. This may be some of the reason for organisations
+being in a currently defeatable state as well. The only ones
+to confine themselves to the enterprise network is
+ironically the network defenders.
+
+> The only ones to confine themselves to the enterprise network is
+> ironically the network defenders.
+
+The battle of controlling the technology evolution is not
+completely lost though, it is a matter of changing the
+mindset of where data or telemetry is collected. Yesterday
+it was at the corporate proxy or in the corporate
+environment - today it is on the endpoint and during the
+connections to valuable resources.
+
+For endpoints, the primary challenges currently faced are:
+
+* Maintaining the integrity of locally stored and buffered data
+* The availability and transport of data to a centralised logging instance
+* Confidentiality of the data in transport or at rest
+* Data source consistency for central correlation of information from several
+  host sources
+* Raising the stakes on operational security in a cat and mouse
+  chase between intruders and defenders
+  
+Remote logging is a subject that has gained much publicity
+previously, so we are not going into depth about that here.
+  
+### Existing Tooling For Endpoints
+
+This section was not originally a part of the scope of this
+article, but I'd like to establish a baseline of parts of
+the available tooling to handle the above issues. I also
+believe it touches some of the endpoint challenges.
+
+For the purpose of this article, we define the following
+well-known computer abstraction stack:
+
+1. Hardware
+2. Operating System
+3. Application
+
+Hardware verification and logging is currently a more or
+less unexplored field, with primarily only one tool
+available to my knowlege. That tool is Chipsec [6] which has
+been of interest and integrated into the Google Rapid
+Response (GRR) [7] project for some time.
+
+Operating system logs are well understood today, and many
+organisations manages logging from the host operating system
+properly.
+
+There are increasingly good event streaming and agent-based
+systems available, such as LimaCharlie [8], Sysmon [9] and
+Carbon Black [10]. The media focus of these platforms are on
+the more trendy term "hunting", but their real purpose is
+OS-level logging and pattern matching.
+
+Further, distributed forensic platforms are available from
+FireEye (HX) and an open source equivalent from Google named
+GRR. GRR have been featured extensively on this site
+previously. Common for these are that they do not stream
+events, but rather stores information on the endpoint.
+
+Application layer logging is extremely challenging. The
+logging mechanism in this regard needs to be connected to
+the structure of the application itself, and there are a lot
+of applications. Further, many application developers does
+not focus on logging.
+
+Application logging is important and could be seen as the
+technical contextual information provided by the
+endpoint. Exposed applications that are important in terms
+of coverage:
+
+* Browsers
+* Email Readers
+* Application Firewalls (if you have one)
+* Instant Messaging Clients
+* Rich Document editors, such as Excel, Word, Powerpoint
+
+These applications are important since they are the first
+point of contact for almost any technical threat. Done
+right, application logs will be at a central location before
+the intruder manages to get a foothold on the client. Thus,
+the risk of data being misrepresented in the central system
+are highly reduced (integrity).
+
+Taking browsers and Microsoft Office as an example, there
+are some options readily available:
+
+* Firefox HTTP and DNS logging: mozilla.org [11]
+* Office Telemetry logging: Office Telemetry Log [12]
+
+The above examples are not security focused as far as I
+could tell, more often they are debug oriented. However, the
+same data is often what we are after as well (such as: did
+the document have a macro? or what is the HTTP header?).
+
+The dependency on the application developers to create
+logging mechanisms is quite a challenge in this
+arena. However, I believe the solutions in cases where
+applications does not log sufficiently is to take advantage
+of plugins. Most modern applications supports plugins to
+some extent.
+
+To summarise the tooling discussion, we can populate the
+computer abstraction layers with the mentioned tools.
+
+    | Level of abstraction  |    | Tools
+    |-----------------------|----|-------------
+    | Application           |    | Browser, Email and so on
+    |-----------------------|--->|-------------
+    | Operating System      |    | LC, CB, Sysmon, 
+    |-----------------------|--->|-------------
+    | Hardware              |    | Chipsec
+
+## Conclusions: How Do We Defend in The Future?
+
+In this article we have defined a structure and discussed in
+short one of the most prominent challenges faced by
+enterprise defenders today: how do we defend in the future?
+
+Technology. This is the point were technology alone is no
+longer the sole solution to defending a network. Modern
+network architectures means that defenders needs to be able
+to fully comprehend and use the human nature as sensors. It
+is also about building intuitive systems which makes the
+necessary data and information available to the
+defenders. In my mind technology has never been the sole
+solution either, so the technology evolution is for the
+greater good.
+
+It seems obvious and unavoidable to me that network
+defenders must start looking outside the perimeter, just as
+intruders have done for many years already. This means
+adapting the toolsets available and lobbying for an
+architecture that reflects how humans actually use
+technology resources. Most people have owned private
+equipment for many years (surprise), and the line between
+employee and enterprise is blurred and confusing when
+realitity now sinks in.
+
+This means, in the technology aspect, that an emphasis must
+be put on the endpoints - and that network monitoring must
+again be about the metadata of the activity. In short:
+collect metadata from networks and content from endpoints.
+
+Only this way will we, in the future, be able to create a
+full telemetry profile from each device under our
+responsibility.
+
+
+[1] Article on indicators: /indicators/  
+[2] 50% encrypted: https://www.eff.org/deeplinks/2017/02/were-halfway-encrypting-entire-web  
+[3] that number: https://letsencrypt.org/stats/  
+[4] Expect-CT: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect-CT  
+[5] Beyondcorp: https://cloud.google.com/beyondcorp/  
+[6] Chipsec: https://github.com/chipsec/chipsec  
+[7] Google Rapid Response (GRR): https://github.com/google/grr-doc/blob/master/publications.adoc  
+[8] LimaCharlie: https://github.com/refractionPOINT/lce_doc/blob/master/README.md  
+[9] Sysmon: https://www.rsaconference.com/writable/presentations/file_upload/hta-w05-tracking_hackers_on_your_network_with_sysinternals_sysmon.pdf  
+[10] Carbon Black: http://the.report/assets/Advanced-Threat-Hunting-with-Carbon-Black.pdf  
+[11] mozilla.org: https://developer.mozilla.org/en-US/docs/Mozilla/Debugging/HTTP_logging  
+[12] Office Telemetry Log: https://msdn.microsoft.com/en-us/library/office/jj230106.aspx  
--- a/data/travel.md
+++ b/data/travel.md
@ -0,0 +1,361 @@
+Travelling with  electronic devices  is a  challenge, and  this is
+certainly the  case if you do  not have a travel  program for your
+employees, where  you must tinker  with a new  setup on a  case by
+case basis. The  complexity of the matter is though,  even when it
+comes to resources, as it requires full time attention.
+
+Some  organisations choose  to  ignore the  problem all  together,
+others again  does not fully  respect their own threat  model. The
+latter may be just  as dangerous, as it may lead  to a false sense
+of security for the travellers.
+
+This article is  about establishing a technical  laptop setup that
+can be  re-used with ease.  Thus, other operational  and strategic
+aspects are left out. The  information presented evolves around
+organisations, but  might as  well apply for  a private  travel of
+exposed individuals.
+
+## Main Drivers
+
+With that  out of the way:  multiple overall factors are  left for
+consideration.  The following  factors  are the  main drivers  and
+equally important when  developing a technical model  of an abroad
+operation.
+
+*  Threat resiliency.  Equipment  on travel  can  really never  be
+  secured well enough, but it can be hardened to the degree that a
+  threat actor needs to risk exposure to compromise it
+* Usability  for the traveller. Equipment  that feels inconvenient
+  will be avoided by the traveller at some point
+* Usability for the supporting  organisation (both security and IT
+  operations). Such setups may require  much time and attention to
+  develop and if  there are an increasing number  of travellers to
+  high risk areas the setup needs to scale
+*  Cost.  A  travel  program is  a  balance  between  environment,
+  security  and  cost.  If   the  cost  and  environmental  impact
+  surpasses the value that needs to be secured, the travel program
+  misses some of its  value. Critical infrastructure organisations
+  is a different ball game than other industries on this point.
+
+When it comes to threats, the  most prominent one is the evil maid
+infiltration vector - which  is basically someone gaining physical
+access to  a computer.  Motherboard recently  published an article
+on how a  malicious party could add a backdoor  to a Dell (example
+used) laptop in less than 5 minutes [1].
+
+Other examples of relevant techniques used against travellers are:
+electronic eavesdropping using  cell networks, physical monitoring
+of  hotel rooms  (e.g.  camera  surveillance), malicious  charging
+stations  and   so  on.  More  details   on  general  infiltration
+techniques can  be found  in the  Mitre ATT&CK's  "Initial Access"
+category (each described on their Wiki [1,2].
+
+## Conceptual Overview
+
+Now that we have reviewed the main drivers, the question is if you
+can protect against the given threat model in an easily achievable
+way. To assess that we will first  have to a look at an conceptual
+model for  travel. Taking  a top-down  approach, the  travel setup
+will in most cases consist of two components:
+
+1. The devices used for travel
+2. The server side infrastructure
+
+There  are arguments  for a  standalone operation,  but the  legal
+ramification and  practical impact of  sending an employee  into a
+hostile environment with anything but local encryption is risky at
+best. To note: that is, if the user will actually produce or carry
+anything of value. If not, a standalone setup may in some cases be
+argued for.
+
+Tactical no-brainers when travelling are the following:
+
+1.  The system  should disclose  as little  as possible  about the
+   traveller's pattern of activity and content
+2. As little information as possible  should be at rest on devices
+   at risk
+3. It should come at a  high cost to compromise the end-point both
+   for physical and technical exploitation
+4. The  equipment should never  be connected to  an organisation's
+   service infrastructure directly before, during or after travel
+5.  The system  should not  be obviously  provocative to  locals -
+   e.g. during airport inspections.
+
+As far  as I have  found, there  are currently one  desktop system
+that sufficiently  meet these  criterions -  and that  is ChromeOS
+which  comes with  sane  default settings,  has  a really  minimal
+configuration  and  is  usable  to an  average  person.   However,
+ChromeOS is not  a mobile operating system - and  for that purpose
+iOS and Android is  a better fit even though they  do not tick off
+all the above boxes.
+
+With that  in mind  the following  model, that  I have  named "The
+Tactical  Travel Protection  Model",  provides  a hardened,  basic
+infrastructure setup  that uses cloud  providers to hide  in plain
+sight.
+
+![The Tactical Travel Protection Model shows the concept of a full stack travel
+setup](/static/img/data/tactical_travel_protection_model.png)
+
+The model further detailed in the following section.
+
+## Scalability and Technical Implementation
+
+With the conceptual model shown in the last section, it is time to
+dive into implementation in a  practical situation.  The beauty of
+the model  is its  modularity, so  a component -  such as  a cloud
+server, can  easily be  put in a  local and  physically controlled
+location.  Thus, please consider  the technologies mentioned as an
+example -  the power  of the  model comes to  play when  you start
+switching things up.
+
+### Server Side Components
+
+Consider theavailability of external services  in all parts of the
+process.  Ideally  a travel  device should store  information only
+outside  the regional  location of  a traveller.   Balance storage
+with requirements of availability.  An  example of such is that an
+enforced VPN connection  may not always be  available, which would
+practically leave an SFTP link exposed or down.
+
+For  the example  technologies  used  in the  model  shown in  the
+previous section, following sections shows the use.
+
+#### Cloud Policy, Provisioning, Device and User Management
+
+The reason  we really need to  use a device management  service is
+the  scalability of  deployment. Using  a standalone  approach may
+work and provide some additional  security due to the independence
+of each device, but it is inevitable in the long run if you handle
+even a low amount of travels.
+
+In this  case, especially due  to using  ChromeOS, G Suite  is the
+most straightforward choice. It is important to focus the solution
+on  managing  devices  when   speaking  of  travels,  not  pushing
+sensitive  configuration  files  and  so on.   If  encountering  a
+compromise of the G Suite  administrative account - it is possible
+to push threat actor-controlled applications and configurations to
+devices.  Due to this it is  essential to clean out the management
+domain or create a new, untraceable one once in a while.
+
+G Suite is  a granular solution. Examples  of recommended policies
+are: enforced  use of security  tokens and the disabling  of other
+two factor authentication options, screen  lock upon lid close and
+so on.
+
+When testing G Suite and ChromeOS  I figured that it is easiest to
+provision  VPN  configuration  files (``.onc``)  and  certificates
+manually. For iOS the same goes with ``.mobileconfig``. Doing this
+adds another protective layer.
+
+#### VPN
+
+For VPN, my  experience is that the most reliable  option is using
+native  supported VPN  clients in  the operating  system used  for
+travel. In  this case  it is  ChromeOS with  OpenVPN and  iOS with
+IPSec. This adds  a bit to the complexity as  iOS does not support
+OpenVPN which  runs most reliably  in some countries  that censors
+the  Internet. However,  ChromeOS does.  The solution  to this  is
+using two VPS nodes for tunneling traffic:
+
+1. OpenVPN service through ansible-openvpn-hardened [4]
+2.   IPSec  service   through  [5].    Lenny  Zeltser   created  a
+   deployment-guide on algo recently [6]
+
+Again:  to  reduce exposure  through  centrality,  you should  not
+provision   device-specific    keys   from    central   management
+consoles. Also, make sure to  use certificates by any service that
+needs to connect to the Internet.
+
+**OpenVPN**:
+
+Configure     according      to     the     README      on     the
+``ansible-openvpn-hardened``  Github page.   When  you deploy  the
+OpenVPN server, you will be left  with a file named something like
+``<user-id>@<random-word>.preregistration-pki-embedded.ovpn``   in
+the ``fetched_credentials/<domain>``  directory.  Just  like Apple
+has  its ``mobileconfig``  format, the  Chromium Project  uses the
+Open Network  Configuration (ONC)  [7]. In  order to  convert this
+format to a  working configuration file, use  ovpn2onc.py [9] like
+the following.
+
+    python3 reference/convert.py --infile *-pki-embedded.ovpn --outfile vpn_configuration.onc --name my_vpn
+
+This     results     in     a     configuration     file     named
+``vpn_configuration.onc``. ChromeOS will not give you any feedback
+here, so make sure to read  through everything to get it right the
+first  time. If  you  end  up troubleshooting,  I  found that  the
+Chromium  project  do  have  some  working  examples  [9].  Import
+``vpn_configuration.onc`` in Chrome as shown in the next section.
+
+Due to  the hardened  setup, be  particularly strict  to configure
+with  an OS  version according  to the  repo README.  For instance
+Debian 8.10 won't work.
+
+**Algo**: Has great docs as-is.
+
+#### SFTP
+
+An  SFTP service  is  simple to  manually  deploy.  However,  when
+scalability  hardening   matters  it  is  best   to  automate  the
+deployment.  Through testing available  Ansible scripts I ended up
+with Johan  Meiring's ansible-sftp [10]. Again,  the configuration
+is  self-explanatory.  You should  however note  that public
+keys   should   be   put   in   a   ``files/``   directory   under
+``ansible-sftp``    root.    These    can   be    generated   with
+``ssh-keygen``, the private keys needs to be stored somewhere else
+for manual transfer to the laptop accessing it.
+
+Since  this is  a  traveller setup  you should  seek  to create  a
+disconnect between cloud  drives and rather use  local storage and
+SFTP. Disable OneDrive in Office  365 Business and Google Drive in
+G Suite.
+
+#### Deploying an Out-of-Band (OOB) Channel
+
+Communications  is king  and  perhaps one  of  the most  important
+things you configure.
+
+I described using Matrix and Riot for OOB recently [11].
+
+#### Security Keys
+
+Nowadays, strong  authentication is  so easy that  everyone should
+use  it. In  a  hostile  environment it  is  hygiene. Google  uses
+Yubikeys and  Feitian tokens in their  authentication services and
+so  should a  traveller [12,13,14].  This eliminates  some of  the
+uncertainty  when authenticating  against  remote  servers and  is
+something the  traveller can keep  on-body at all times.  For this
+setup  not  every  service   can  maintain  usability  when  using
+tokens. Those services  - such as a mounted SFTP  share should use
+certificates.
+
+
+### Client Side Components
+
+So why a Chromebook?
+
+*  Has a  minimal  configuration.   Everything you  do  is in  the
+  browser
+* You get granular control through G Suite
+*  Based on  the Linux-kernel,  which means  it is  different from
+  Windows and may require some extra effort from a threat actor
+* A lot of work has gone  in to the user interface in ChromeOS, so
+  it will feel familiar and intuitive to users
+* ChromeOS has a lot of security features built-in [15], such as:
+  Secure Boot, Security Key login and so on.
+
+G Suite  will help you a  little bit on  the way when it  comes to
+configuration  control.   However,  it requires  some  client-side
+configuration.
+
+The client side consists of components.  I chose to model these as
+five layers:
+
+The  Traveller. The  most important  asset on  the travel  is most
+likely  your human  traveller. This  asset will  have some  values
+assigned to  it, such  as security keys,  credentials and  his own
+knowledge.   Anonymise information  stored here.  In other  words,
+make sure to use an identifier and not the travellers real name.
+
+Device  and  information.  When   selecting  devices  and  putting
+information  on it  you have  entered the  device and  information
+exposure  layer.  This  will  typically consist  of  all  hardware
+peripherals, such as cameras, and  content such as calls made from
+a handset. Other things to consider here for ChromeOS is deploying
+PGP  and its  keys with  Mailvelope  and Office  from Google  Play
+Store.
+
+Content. It was actually kind of interesting to model this from an
+iOS and ChromeOS  perspective, because ChromeOS keeps  most of its
+applications in the browser while iOS has native apps on line with
+Chrome. This again means that  the exposure surface of ChromeOS is
+more uniform than on iOS.
+
+Native applications. This is  the actual applications installed in
+the operating  system directly. For  iOS this has  larger exposure
+with  native  applications  for   e.g.  communications,  while  on
+ChromeOS you  will basically  only install an  SFTP plugin  to the
+file system and use Chrome for a travel.
+
+Transport.  When  travelling to a hostile  environment, tunnel all
+communications to and from the system as far as possible. Both iOS
+and ChromeOS has sufficient mechanisms  here as we reviewed in the
+previous section. For encryption keys:
+    
+1. Transfer  encryption keys stored  in the ``.p12`` file  and the
+   configuration to the Chromebook
+2.          Install           encryption          keys          in
+   ``chrome://settings/certificates``. Use  the "Import  and Bind"
+   option to install the certificate to TPM
+2.     Import      the     VPN     configuration      (ONC)     in
+   ``chrome://net-internals/#chromeos``
+
+That is basically it.
+
+## Conclusion
+
+The art of  balancing threat resiliency, usability and  cost is an
+intriguing problem.
+
+The technology out there, presented in  this article, is in no way
+designed to  survive in hostile environments  when considering the
+capabilities of  nation state threat actors.  Fundamental security
+mechanisms are  lacking in  this regard,  and only  companies like
+Microsoft,  Google  and Apple  can  provide  the basis  to  change
+those. We can however slow these actors down considerably.
+
+An important  aspect to consider,  in order to compensate  for the
+above  weaknesses, is  that  organisations needs  to handle  these
+problems on an operational and strategic level as well.
+
+Using cloud environments  are a solid choice  for travel. However,
+when considering threat actors that are able to gain access to the
+hosts  of those  environments they  are not  sufficient. To  solve
+this, the  most valuable services  may be  moved in-house or  to a
+hardened cloud environment. End-to-end encryption is also required
+when using cloud  services, such as when using  the included inbox
+of G Suite.
+
+Please keep in mind that The Tactical Traveler Protection Model is
+a  core model.  This  article  does not  cover  every aspect.   An
+example  of   such  is  encryption  and   protection  of  external
+peripherals  and  memory  devices and  operational  and  strategic
+considerations.
+
+Organisations  have yet  to  prove a  working  model resilient  to
+capable adversaries.  Hopefully this  article will be a foundation
+to discuss variations and weaknesses in the community.
+
+
+
+[1] https://motherboard.vice.com/en_us/article/a3q374/hacker-bios-firmware-backdoor-evil-maid-attack-laptop-5-minutes
+
+[2] https://mitre.github.io/attack-navigator/enterprise/
+
+[3] https://attack.mitre.org/wiki/Initial_Access
+
+[4] https://github.com/bau-sec/ansible-openvpn-hardened
+
+[5] https://github.com/trailofbits/algo
+
+[6] https://zeltser.com/deploy-algo-vpn-digital-ocean/
+
+[7] https://www.chromium.org/chromium-os/chromiumos-design-docs/open-network-configuration
+
+[8] https://gist.github.com/tommyskg/6d0eeecc5bab65a49d72f5b16e086976
+
+[9] https://chromium.googlesource.com/chromium/src/+/32352ad08ee673a4d43e8593ce988b224f6482d3/chromeos/test/data/network
+
+[10] https://github.com/johanmeiring/ansible-sftp
+
+[11] https://secdiary.com/2018-07-11-matrix.html
+
+[12] https://krebsonsecurity.com/2018/07/google-security-keys-neutralized-employee-phishing/
+
+[13] https://www.yubico.com/product/yubikey-4-series/#yubikey-4c
+
+[14] https://ftsafe.com/onlinestore/product?id=3
+
+[15] http://dhanus.mit.edu/docs/ChromeOSSecurity.pdf
+
--- a/data/vantage.md
+++ b/data/vantage.md
@ -0,0 +1,222 @@
+## Key Takeaways
+
+* Monitoring the technology infrastructure is a key element for
+  situational awareness in both security and IT operations.
+* A 2020 infrastructure should use a modern application layer
+  reverse proxy such as Pomerium in front of all services. Leave
+  all clients outside.
+* The threat landscape should be the focus when shaping a
+  defendable infrastructure.
+
+<small><i>Disclaimer: If you have outsourced all your equipment
+and information to "the cloud", this post is a sanity check of the
+relationship with your vendor. The primary audience of this post
+is everyone willing to invest in people and knowledge to provide a
+best possible defense for their people and processes, and the
+technology supporting them.</i></small>
+
+## Introduction
+
+I cannot start to imagine how many times Sun Tzu must have been
+quoted in board rooms around the world:
+
+> If you know the enemy and know yourself, you need not fear the
+> result of a hundred battles. If you know yourself but not the
+> enemy, for every victory gained you will also suffer a
+> defeat. If you know neither the enemy nor yourself, you will
+> succumb in every battle.
+
+However much repeated, the message has not come across. Why is
+that?  Because this is a hard problem to solve. It is in the
+intersection between people as a culture and technology.
+
+If all used reverse proxies in a sensible way I would probably
+have a lot less to do at work. Time and time again it turns out
+that organisations do not have configuration control over their
+applications and infrastructure, and the reverse proxy is a
+central building block in gaining it. To an extent everything is
+about logs and traceability when an incident occurs.
+
+
+
+## Beyondcorp and The Defendable Infrastructure
+
+The lucky part of this hard-to-solve problem is that Google has
+already prescribed one good solution in its Beyondcorp whitepapers
+[1].
+
+But this was in some ways described in the Norwegian Armed Forces
+before that in its five architecture principles for a defendable
+infrastructure. These were published by its former Head of Section
+Critical Infrastructure Protection Centre [2]:
+
+1. Monitor the network for situational awareness
+2. A defender must be able to shape the battleground to have
+   freedom of movement and to limit the opponent's freedom of
+   movement
+3. Update services to limit vulnerability exposure
+4. Minimize the infrastructure to limit the attack
+   surface
+5. Traceability is important to analyze what happened
+
+I know that Richard Bejtlich was an inspiration for the defendable
+infrastructure principles, so the books written by him is relevant
+[4,5].
+
+Defendable infrastructure is a good term, and also used in a 2019
+Lockheed article which defines it well [3]:
+
+> Classical security engineering and architecture has been trying
+> to solve the wrong problem. It is not sufficient to try to build
+> hardened systems; instead we must build systems that are
+> defendable. A system’s requirements, design, or test results can’t
+> be declared as "secure." Rather, it is a combination of how the
+> system is designed, built, operated, and defended that ultimately
+> protects the system and its assets over time. Because adversaries
+> adapt their own techniques based on changing objectives and
+> opportunities, systems and enterprises must be actively defended.
+
+The development of these architecture principles happened before
+2010, so the question remains how they apply in 2020. We may get
+back to the other principles in later posts, but the rest of this
+article will focus on monitoring in a 2020-perspective.
+
+## Monitoring - a Central Vantage Point
+
+One thing that has developed since 2010 is our understanding of
+positioning monitoring capabilities and the more mainstream
+possibility of detection on endpoints. The historical focus of
+mature teams was primarily on the network layer. While the network
+layer is still important as an objective point of observation the
+application layer has received more attention. The reason for it
+is the acceptance that often it is were exploitation happens and
+the capabilities as commercial products has emerged.
+
+With that in mind a shift in the understanding of a best practice
+of positioning reverse proxies has occured as well. While the
+previous recommendation was to think: defend inside-out. The focus
+is now to defend outside-in.
+
+The meaning of defending outside-in, is to take control of what
+can be controlled: the application infrastructure. In all
+practicality this means to position the reverse proxy in front of
+your server segment instead of the whole network, including
+clients.
+
+
+                                                   [ Application A ]
+    [ Client on-prem ]                                    |
+	                 ] ---> [ Reverse proxy ] ---> [  App gateway  ]
+	[ Client abroad  ]              ^                     |
+	                         risk assessment       [ Application B ]
+	
+
+Previously, by some reason, we put the "client on-prem" on the
+other side of the reverse proxy, because we believed we could
+control what the user was doing. Today, we know better. This is
+not a trust issue, it is a matter of prioritizing based on the
+asset value and the defending capacity.
+
+A reverse proxy is also a central vantage point of your
+infrastructure. In a nutshell if you are good detecting security
+incidents at this point, you are in a good position to have
+freedom of movement - such as channeling your opponent.
+
+The modern reverse proxy have two integration capabilitites that
+legacy proxies do not:
+
+* Single sign-on (SSO), which provides strong authentication and
+  good identity management
+* Access control logic (Google calls this the access control
+  engine)
+
+In fact, Google in 2013 stated it uses 120 variables for a risk
+assessment in its access control logic for Gmail [6]. In
+comparison most organisations today use three: username, password
+and in half the instances a token.
+
+> Every time you sign in to Google, whether via your web browser
+> once a month or an email program that checks for new mail every
+> five minutes, our system performs a complex risk analysis to
+> determine how likely it is that the sign-in really comes from
+> you. In fact, there are more than 120 variables that can factor
+> into how a decision is made.
+
+I imagine that Google uses the following factors for comparison to
+the sole username/password approach (they state some of these in
+their article):
+
+- Geo-location with an algoritmic score of destination of last
+  login to current location was part of this. The k-means distance
+  could be a good fit.
+- Source ASN risk score
+- Asset subject to access
+- User role scored against asset subject to access
+- Device state (updated, antivirus installed and so on)
+- Previous usage patterns, like time of day
+- Other information about the behavioural patterns of relevant threats
+
+Another nice feature of a reverse proxy setup this way is that it
+minimizes the exposure and gives defenders the possibility to
+route traffic the way they see fit. For instance, it would be hard
+for an attacker to differentiate between a honeypot and a
+production system in the first place. One could also challenge the
+user in cases where in doubt, instead of plainly denying access as
+is sometimes done.
+
+One challenge is what protocols need support. The two clear ones
+are:
+
+* HTTP
+* SSH
+* Application gateways between micro-segments
+
+I have scoped out the details of micro-segmentation from this
+post. Micro-segmentation is the basic idea of creating a fine mesh
+of network segments in the infrastructure so that no asset can
+communicate with another by default. The rest is then routed
+through e.g. a gateway such as Pomerium, or in high-performance
+cases an application gateway - which may be a gateway for a
+specific binary protocol. The reason is control of all activity
+between services, being able to shape and deny access in the
+terrain.
+
+Even though this post is not about implementation I will leave you
+with some examples of good open source starting points: Pomerium
+is an reverse proxy with the SSO-capability, and the default
+capabilities of SSH takes you far (ssh-ca and JumpHost). 
+
+              -----------> [ syslog server ] <------------
+             |                    |                       |
+    		 |                    |                       |
+     o		 |                    |                       |
+    /|\ [ Client ] -------> [ example.com ] <-----> [ app001.example.com ]
+    / \      |      https    - pomerium        |
+    		 |	     |       - SSH JumpHost    |
+    		 |       |                         |
+    		 |       |                         |
+          [ HIDS ]   |-------------------> [ NIDS ]
+    
+           Figure 1: Conceptual Defendable Infrastructure Overview
+
+
+Now that a checkpoint is establish in front of the infrastructure,
+the rest is a matter of traceability, taking the time to
+understand the data to gain insight and finally develop and
+implement tactics against your opponents.
+
+
+Until next time.
+
+
+[1] https://cloud.google.com/beyondcorp  
+[2]
+https://norcydef.blogspot.com/2013/03/tg13-forsvarbar-informasjonsinfrastrukt.html  
+[3]
+https://www.lockheedmartin.com/content/dam/lockheed-martin/rms/documents/cyber/LM-White-Paper-Defendable-Architectures.pdf  
+[4] Tao of Network Security Monitoring, The: Beyond Intrusion
+Detection  
+[5] Extrusion Detection: Security Monitoring for Internal
+Intrusions  
+[6]
+https://blog.google/topics/safety-security/an-update-on-our-war-against-account/  
--- a/flake.lock
+++ b/flake.lock
@ -0,0 +1,77 @@
+{
+  "nodes": {
+    "cl-nix-lite": {
+      "locked": {
+        "lastModified": 1721009305,
+        "narHash": "sha256-GtVd8VmPZB+J64VCf26yLbFUFRT1mdpzC8ylAHMIJoo=",
+        "owner": "hraban",
+        "repo": "cl-nix-lite",
+        "rev": "dc2793ec716b294739dabd6d99cc61543e6cd149",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hraban",
+        "repo": "cl-nix-lite",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1710146030,
+        "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1722791413,
+        "narHash": "sha256-rCTrlCWvHzMCNcKxPE3Z/mMK2gDZ+BvvpEVyRM4tKmU=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "8b5b6723aca5a51edf075936439d9cd3947b7b2c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-24.05",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "cl-nix-lite": "cl-nix-lite",
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
--- a/flake.nix
+++ b/flake.nix
@ -13,17 +13,57 @@
        pkgs = nixpkgs.legacyPackages.${system}.extend cl-nix-lite.overlays.default;
      in 
      {
-
-          defaultPackage.x86_64-linux =
-            # Notice the reference to nixpkgs here.
-            with import nixpkgs { system = "x86_64-linux"; };
-            stdenv.mkDerivation {
-              name = "hello";
-              src = self;
-              buildPhase = "gcc -o hello ./hello.c";
-              installPhase = "mkdir -p $out/bin; install -t $out/bin hello";
+        packages = {
+          ecl = with pkgs.lispPackagesLiteFor pkgs.ecl; lispDerivation {
+            name = "thoughts";
+            lispSystem = "thoughts";
+            lispDependencies = [
+              asdf
+              arrow-macros
+            ];
+            src = pkgs.lib.cleanSource ./generator.lisp;
+            meta = {
+              license = pkgs.lib.licenses.agpl3Only;
            };

+            buildInputs = [
+              pkgs.ecl
+              pkgs.git
+              pkgs.gnumake
+              pkgs.asdf
+              pkgs.multimarkdown
+            ];
+
+            phases = [ "unpackPhase" "installPhase" "cleanupPhase" ];
+
+            unpackPhase = ''
+              mkdir -p $TMPDIR
+              cp ${./generator.lisp} $TMPDIR/generator.lisp
+              mkdir -p $TMPDIR/data
+              cp -r ${toString ./data}/* $TMPDIR/data/
+              mkdir -p $TMPDIR/templates
+              cp -r ${toString ./templates}/* $TMPDIR/templates/
+              mkdir -p $TMPDIR/static
+              cp -r ${toString ./static}/* $TMPDIR/static/
+            '';
+
+            installPhase = ''
+              mkdir -p $out/html
+              mkdir -p $out/gemini
+              mkdir -p $TMPDIR/output/gemini/articles
+              mkdir -p $TMPDIR/output/html
+              mkdir -p $TMPDIR/temp/data
+              cd $TMPDIR
+              ecl --load $TMPDIR/generator.lisp
+              cp -r $TMPDIR/output/html/* $out/html/
+              cp -r $TMPDIR/output/gemini/* $out/gemini/
+              cp -r $TMPDIR $out/tmpdir
+            '';
+
+            cleanupPhase = ''
+              rm -rf $TMPDIR/temp
+            '';
+          };
        };

        devShell = pkgs.mkShell {