Add image reparse and deletion functions (#31)
* Update readme for bulk_extractor v2.0.0 * Update docker image to Ubuntu 20.04 * Parse filesystem before string extraction * Refactor postgres datastore code * Add reparse option * Add option to delete image data * Update usage * Update version
This commit is contained in:
parent
b61a835d4a
commit
7aadd41ee2
11 changed files with 541 additions and 237 deletions
|
@ -8,10 +8,8 @@ dfDewey is a digital forensics string extraction, indexing, and searching tool.
|
||||||
## Requirements
|
## Requirements
|
||||||
### bulk_extractor
|
### bulk_extractor
|
||||||
dfDewey currently requires bulk_extractor for string extraction.
|
dfDewey currently requires bulk_extractor for string extraction.
|
||||||
bulk_extractor can be downloaded and built from source here:
|
|
||||||
https://github.com/simsong/bulk_extractor
|
|
||||||
|
|
||||||
bulk_extractor can also be installed from the GIFT PPA.
|
bulk_extractor can be installed from the GIFT PPA.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
sudo add-apt-repository ppa:gift/stable
|
sudo add-apt-repository ppa:gift/stable
|
||||||
|
@ -19,6 +17,11 @@ sudo apt update
|
||||||
sudo apt install -y bulk-extractor
|
sudo apt install -y bulk-extractor
|
||||||
```
|
```
|
||||||
|
|
||||||
|
bulk_extractor can also be downloaded and built from source here:
|
||||||
|
https://github.com/simsong/bulk_extractor
|
||||||
|
|
||||||
|
Note: bulk_extractor v1.6.0 is recommended (v2.0.0 is not yet supported).
|
||||||
|
|
||||||
### dfVFS
|
### dfVFS
|
||||||
[dfVFS](https://github.com/log2timeline/dfvfs) is required for image parsing. It
|
[dfVFS](https://github.com/log2timeline/dfvfs) is required for image parsing. It
|
||||||
can be installed from the GIFT PPA.
|
can be installed from the GIFT PPA.
|
||||||
|
|
|
@ -17,4 +17,4 @@
|
||||||
dfDewey is a digital forensics string extraction, indexing, and searching tool.
|
dfDewey is a digital forensics string extraction, indexing, and searching tool.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = '20211220'
|
__version__ = '20220603'
|
||||||
|
|
|
@ -44,19 +44,7 @@ class PostgresqlDataStore():
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def bulk_insert(self, table_spec, rows):
|
def _execute(self, command):
|
||||||
"""Execute a bulk insert into a table.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
table_spec: String in the form 'table_name (col1, col2, ..., coln)'
|
|
||||||
rows: Array of value tuples to be inserted
|
|
||||||
"""
|
|
||||||
extras.execute_values(
|
|
||||||
self.cursor,
|
|
||||||
'INSERT INTO {0:s} VALUES %s ON CONFLICT DO NOTHING'.format(table_spec),
|
|
||||||
rows)
|
|
||||||
|
|
||||||
def execute(self, command):
|
|
||||||
"""Execute a command in the PostgreSQL database.
|
"""Execute a command in the PostgreSQL database.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -64,7 +52,7 @@ class PostgresqlDataStore():
|
||||||
"""
|
"""
|
||||||
self.cursor.execute(command)
|
self.cursor.execute(command)
|
||||||
|
|
||||||
def query(self, query):
|
def _query(self, query):
|
||||||
"""Query the database.
|
"""Query the database.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -77,7 +65,7 @@ class PostgresqlDataStore():
|
||||||
|
|
||||||
return self.cursor.fetchall()
|
return self.cursor.fetchall()
|
||||||
|
|
||||||
def query_single_row(self, query):
|
def _query_single_row(self, query):
|
||||||
"""Query the database for a single row.
|
"""Query the database for a single row.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -90,6 +78,191 @@ class PostgresqlDataStore():
|
||||||
|
|
||||||
return self.cursor.fetchone()
|
return self.cursor.fetchone()
|
||||||
|
|
||||||
|
def bulk_insert(self, table_spec, rows):
|
||||||
|
"""Execute a bulk insert into a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table_spec: String in the form 'table_name (col1, col2, ..., coln)'
|
||||||
|
rows: Array of value tuples to be inserted
|
||||||
|
"""
|
||||||
|
extras.execute_values(
|
||||||
|
self.cursor,
|
||||||
|
'INSERT INTO {0:s} VALUES %s ON CONFLICT DO NOTHING'.format(table_spec),
|
||||||
|
rows)
|
||||||
|
|
||||||
|
def create_database(self, db_name):
|
||||||
|
"""Create a database for the image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_name: Database name
|
||||||
|
"""
|
||||||
|
self._execute('CREATE DATABASE {0:s}'.format(db_name))
|
||||||
|
|
||||||
|
def create_filesystem_database(self):
|
||||||
|
"""Create a filesystem database for the image."""
|
||||||
|
self._execute((
|
||||||
|
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
|
||||||
|
'PRIMARY KEY (block, inum, part))'))
|
||||||
|
self._execute((
|
||||||
|
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
|
||||||
|
'PRIMARY KEY (inum, filename, part))'))
|
||||||
|
|
||||||
|
def delete_filesystem_database(self, db_name):
|
||||||
|
"""Delete the filesystem database for the image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_name: The name of the database to drop
|
||||||
|
"""
|
||||||
|
self._execute('DROP DATABASE {0:s}'.format(db_name))
|
||||||
|
|
||||||
|
def delete_image(self, image_id):
|
||||||
|
"""Delete an image from the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: Image identifier
|
||||||
|
"""
|
||||||
|
self._execute(
|
||||||
|
'DELETE FROM images WHERE image_id = \'{0:s}\''.format(image_id))
|
||||||
|
|
||||||
|
def get_case_images(self, case):
|
||||||
|
"""Get all images for the case.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
case: Case name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary of the images in the case.
|
||||||
|
"""
|
||||||
|
images = {}
|
||||||
|
results = self._query((
|
||||||
|
'SELECT image_hash, image_path FROM image_case NATURAL JOIN images '
|
||||||
|
'WHERE case_id = \'{0:s}\'').format(case))
|
||||||
|
for image_hash, image_path in results:
|
||||||
|
images[image_hash] = image_path
|
||||||
|
return images
|
||||||
|
|
||||||
|
def get_filenames_from_inode(self, inode, location):
|
||||||
|
"""Gets filename(s) from an inode number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inode: Inode number of target file
|
||||||
|
location: Partition number
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Filename(s) of given inode or None
|
||||||
|
"""
|
||||||
|
results = self._query((
|
||||||
|
'SELECT filename FROM files '
|
||||||
|
'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location))
|
||||||
|
filenames = []
|
||||||
|
for result in results:
|
||||||
|
filenames.append(result[0])
|
||||||
|
return filenames
|
||||||
|
|
||||||
|
def get_image_cases(self, image_id):
|
||||||
|
"""Get a list of cases the image is linked to.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: Image identifier
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of cases or None.
|
||||||
|
"""
|
||||||
|
cases = self._query(
|
||||||
|
'SELECT case_id FROM image_case WHERE image_id = \'{0:s}\''.format(
|
||||||
|
image_id))
|
||||||
|
for c in range(len(cases)):
|
||||||
|
cases[c] = cases[c][0]
|
||||||
|
return cases
|
||||||
|
|
||||||
|
def get_image_hash(self, image_id):
|
||||||
|
"""Get an image hash from the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: Image identifier
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Hash for the image stored in PostgreSQL or None.
|
||||||
|
"""
|
||||||
|
image_hash = self._query_single_row(
|
||||||
|
'SELECT image_hash FROM images WHERE image_id = \'{0:s}\''.format(
|
||||||
|
image_id))
|
||||||
|
if image_hash:
|
||||||
|
return image_hash[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_inodes(self, block, location):
|
||||||
|
"""Gets inode numbers for a block offset.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
block (int): block offset within the image.
|
||||||
|
location (str): Partition location / identifier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Inode number(s) of the given block or None.
|
||||||
|
"""
|
||||||
|
inodes = self._query(
|
||||||
|
('SELECT inum FROM blocks '
|
||||||
|
'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location))
|
||||||
|
for i in range(len(inodes)):
|
||||||
|
inodes[i] = inodes[i][0]
|
||||||
|
return inodes
|
||||||
|
|
||||||
|
def initialise_database(self):
|
||||||
|
"""Initialse the image database."""
|
||||||
|
self._execute((
|
||||||
|
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, '
|
||||||
|
'image_hash TEXT)'))
|
||||||
|
|
||||||
|
self._execute((
|
||||||
|
'CREATE TABLE image_case ('
|
||||||
|
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
|
||||||
|
'PRIMARY KEY (case_id, image_id))'))
|
||||||
|
|
||||||
|
def insert_image(self, image_id, image_path, image_hash):
|
||||||
|
"""Add an image to the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: Image identifier
|
||||||
|
image_path: Path to the image file
|
||||||
|
image_hash: Hash of the image
|
||||||
|
"""
|
||||||
|
self._execute((
|
||||||
|
'INSERT INTO images (image_id, image_path, image_hash) '
|
||||||
|
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
|
||||||
|
image_id, image_path, image_hash))
|
||||||
|
|
||||||
|
def is_image_in_case(self, image_id, case):
|
||||||
|
"""Check if an image is attached to a case.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: Image identifier
|
||||||
|
case: Case name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the image is attached to the case, otherwise False.
|
||||||
|
"""
|
||||||
|
image_case = self._query_single_row((
|
||||||
|
'SELECT 1 from image_case '
|
||||||
|
'WHERE image_id = \'{0:s}\' AND case_id = \'{1:s}\'').format(
|
||||||
|
image_id, case))
|
||||||
|
if image_case:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def link_image_to_case(self, image_id, case):
|
||||||
|
"""Attaches an image to a case.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: Image identifier
|
||||||
|
case: Case name
|
||||||
|
"""
|
||||||
|
self._execute((
|
||||||
|
'INSERT INTO image_case (case_id, image_id) '
|
||||||
|
'VALUES (\'{0:s}\', \'{1:s}\')').format(case, image_id))
|
||||||
|
|
||||||
def switch_database(
|
def switch_database(
|
||||||
self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
|
self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
|
||||||
"""Connects to a different database.
|
"""Connects to a different database.
|
||||||
|
@ -128,6 +301,19 @@ class PostgresqlDataStore():
|
||||||
|
|
||||||
return self.cursor.fetchone() is not None
|
return self.cursor.fetchone() is not None
|
||||||
|
|
||||||
|
def unlink_image_from_case(self, image_id, case):
|
||||||
|
"""Removes an image from a case.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_id: Image identifier
|
||||||
|
case: Case name
|
||||||
|
"""
|
||||||
|
self._execute(
|
||||||
|
"""
|
||||||
|
DELETE FROM image_case
|
||||||
|
WHERE case_id = '{0:s}' AND image_id = '{1:s}'""".format(
|
||||||
|
case, image_id))
|
||||||
|
|
||||||
def value_exists(self, table_name, column_name, value):
|
def value_exists(self, table_name, column_name, value):
|
||||||
"""Check if a value exists in a table.
|
"""Check if a value exists in a table.
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ import mock
|
||||||
from psycopg2 import OperationalError
|
from psycopg2 import OperationalError
|
||||||
|
|
||||||
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
||||||
|
from dfdewey.utils.image_processor_test import TEST_CASE, TEST_IMAGE, TEST_IMAGE_HASH, TEST_IMAGE_ID
|
||||||
|
|
||||||
|
|
||||||
class PostgresqlTest(unittest.TestCase):
|
class PostgresqlTest(unittest.TestCase):
|
||||||
|
@ -47,15 +48,90 @@ class PostgresqlTest(unittest.TestCase):
|
||||||
'VALUES %s ON CONFLICT DO NOTHING')
|
'VALUES %s ON CONFLICT DO NOTHING')
|
||||||
mock_execute_values.assert_called_once_with(db.cursor, expected_sql, rows)
|
mock_execute_values.assert_called_once_with(db.cursor, expected_sql, rows)
|
||||||
|
|
||||||
|
def test_create_filesystem_database(self):
|
||||||
|
"""Test create filesystem database method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
||||||
|
db.create_filesystem_database()
|
||||||
|
|
||||||
|
calls = [
|
||||||
|
mock.call((
|
||||||
|
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
|
||||||
|
'PRIMARY KEY (block, inum, part))')),
|
||||||
|
mock.call((
|
||||||
|
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
|
||||||
|
'PRIMARY KEY (inum, filename, part))'))
|
||||||
|
]
|
||||||
|
mock_execute.assert_has_calls(calls)
|
||||||
|
|
||||||
|
def test_delete_filesystem_database(self):
|
||||||
|
"""Test delete filesystem database method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
db_name = ''.join(('fs', TEST_IMAGE_HASH))
|
||||||
|
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
||||||
|
db.delete_filesystem_database(db_name)
|
||||||
|
mock_execute.assert_called_once_with(
|
||||||
|
'DROP DATABASE {0:s}'.format(db_name))
|
||||||
|
|
||||||
|
def test_delete_image(self):
|
||||||
|
"""Test delete image method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
||||||
|
db.delete_image(TEST_IMAGE_ID)
|
||||||
|
mock_execute.assert_called_once_with(
|
||||||
|
'DELETE FROM images WHERE image_id = \'{0:s}\''.format(TEST_IMAGE_ID))
|
||||||
|
|
||||||
def test_execute(self):
|
def test_execute(self):
|
||||||
"""Test execute method."""
|
"""Test execute method."""
|
||||||
db = self._get_datastore()
|
db = self._get_datastore()
|
||||||
command = (
|
command = (
|
||||||
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
|
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
|
||||||
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
||||||
db.execute(command)
|
db._execute(command)
|
||||||
mock_execute.assert_called_once_with(command)
|
mock_execute.assert_called_once_with(command)
|
||||||
|
|
||||||
|
def test_get_case_images(self):
|
||||||
|
"""Test get case images method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'fetchall',
|
||||||
|
return_value=[(TEST_IMAGE_HASH, TEST_IMAGE)]):
|
||||||
|
images = db.get_case_images(TEST_CASE)
|
||||||
|
self.assertEqual(images, {TEST_IMAGE_HASH: TEST_IMAGE})
|
||||||
|
|
||||||
|
def test_get_filenames_from_inode(self):
|
||||||
|
"""Test get filenames from inode method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'fetchall',
|
||||||
|
return_value=[('test.txt',), ('test.txt:ads',)]):
|
||||||
|
filenames = db.get_filenames_from_inode(42, '/p1')
|
||||||
|
self.assertEqual(len(filenames), 2)
|
||||||
|
self.assertEqual(filenames[0], 'test.txt')
|
||||||
|
self.assertEqual(filenames[1], 'test.txt:ads')
|
||||||
|
|
||||||
|
def test_get_image_cases(self):
|
||||||
|
"""Test get image cases method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'fetchall', return_value=[('test',),
|
||||||
|
('test2',)]):
|
||||||
|
cases = db.get_image_cases(TEST_IMAGE_ID)
|
||||||
|
self.assertEqual(cases[0], 'test')
|
||||||
|
self.assertEqual(cases[1], 'test2')
|
||||||
|
|
||||||
|
def test_get_image_hash(self):
|
||||||
|
"""Test get image hash method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'fetchone',
|
||||||
|
return_value=(TEST_IMAGE_HASH,)):
|
||||||
|
image_hash = db.get_image_hash(TEST_IMAGE_ID)
|
||||||
|
self.assertEqual(image_hash, TEST_IMAGE_HASH)
|
||||||
|
|
||||||
|
def test_get_inodes(self):
|
||||||
|
"""Test get inodes method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'fetchall', return_value=[(10,), (19,)]):
|
||||||
|
inodes = db.get_inodes(1234, '/p1')
|
||||||
|
self.assertEqual(inodes, [10, 19])
|
||||||
|
|
||||||
@mock.patch('psycopg2.connect')
|
@mock.patch('psycopg2.connect')
|
||||||
def test_init(self, mock_connect):
|
def test_init(self, mock_connect):
|
||||||
"""Test init method."""
|
"""Test init method."""
|
||||||
|
@ -63,12 +139,57 @@ class PostgresqlTest(unittest.TestCase):
|
||||||
with self.assertRaises(RuntimeError):
|
with self.assertRaises(RuntimeError):
|
||||||
db = PostgresqlDataStore()
|
db = PostgresqlDataStore()
|
||||||
|
|
||||||
|
def test_initialise_database(self):
|
||||||
|
"""Test initialise database method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
calls = [
|
||||||
|
mock.call(
|
||||||
|
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, image_hash TEXT)'
|
||||||
|
),
|
||||||
|
mock.call((
|
||||||
|
'CREATE TABLE image_case ('
|
||||||
|
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
|
||||||
|
'PRIMARY KEY (case_id, image_id))'))
|
||||||
|
]
|
||||||
|
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
||||||
|
db.initialise_database()
|
||||||
|
mock_execute.assert_has_calls(calls)
|
||||||
|
|
||||||
|
def test_insert_image(self):
|
||||||
|
"""Test insert image method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
||||||
|
db.insert_image(TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH)
|
||||||
|
mock_execute.assert_called_once_with((
|
||||||
|
'INSERT INTO images (image_id, image_path, image_hash) '
|
||||||
|
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
|
||||||
|
TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH))
|
||||||
|
|
||||||
|
def test_is_image_in_case(self):
|
||||||
|
"""Test is image in case method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'fetchone', return_value=(1,)):
|
||||||
|
result = db.is_image_in_case(TEST_IMAGE_ID, TEST_CASE)
|
||||||
|
self.assertTrue(result)
|
||||||
|
with mock.patch.object(db.cursor, 'fetchone', return_value=None):
|
||||||
|
result = db.is_image_in_case(TEST_IMAGE_ID, TEST_CASE)
|
||||||
|
self.assertFalse(result)
|
||||||
|
|
||||||
|
def test_link_image_to_case(self):
|
||||||
|
"""Test link image to case method."""
|
||||||
|
db = self._get_datastore()
|
||||||
|
with mock.patch.object(db.cursor, 'execute') as mock_execute:
|
||||||
|
db.link_image_to_case(TEST_IMAGE_ID, TEST_CASE)
|
||||||
|
mock_execute.assert_called_once_with((
|
||||||
|
'INSERT INTO image_case (case_id, image_id) '
|
||||||
|
'VALUES (\'{0:s}\', \'{1:s}\')').format(TEST_CASE, TEST_IMAGE_ID))
|
||||||
|
|
||||||
def test_query(self):
|
def test_query(self):
|
||||||
"""Test query method."""
|
"""Test query method."""
|
||||||
db = self._get_datastore()
|
db = self._get_datastore()
|
||||||
query = 'SELECT filename FROM files WHERE inum = 0'
|
query = 'SELECT filename FROM files WHERE inum = 0'
|
||||||
with mock.patch.object(db.cursor, 'fetchall', return_value=[('$MFT',)]):
|
with mock.patch.object(db.cursor, 'fetchall', return_value=[('$MFT',)]):
|
||||||
results = db.query(query)
|
results = db._query(query)
|
||||||
|
|
||||||
self.assertEqual(results, [('$MFT',)])
|
self.assertEqual(results, [('$MFT',)])
|
||||||
|
|
||||||
|
@ -79,7 +200,7 @@ class PostgresqlTest(unittest.TestCase):
|
||||||
'SELECT 1 from image_case WHERE image_hash = '
|
'SELECT 1 from image_case WHERE image_hash = '
|
||||||
'\'d41d8cd98f00b204e9800998ecf8427e\'')
|
'\'d41d8cd98f00b204e9800998ecf8427e\'')
|
||||||
with mock.patch.object(db.cursor, 'fetchone', return_value=(1,)):
|
with mock.patch.object(db.cursor, 'fetchone', return_value=(1,)):
|
||||||
results = db.query_single_row(query)
|
results = db._query_single_row(query)
|
||||||
|
|
||||||
self.assertEqual(results, (1,))
|
self.assertEqual(results, (1,))
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,8 @@ def main():
|
||||||
log.error('Image must be supplied for processing.')
|
log.error('Image must be supplied for processing.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
image_processor_options = ImageProcessorOptions(
|
image_processor_options = ImageProcessorOptions(
|
||||||
not args.no_base64, not args.no_gzip, not args.no_zip, args.reindex)
|
not args.no_base64, not args.no_gzip, not args.no_zip, args.reparse,
|
||||||
|
args.reindex, args.delete)
|
||||||
image_processor = ImageProcessor(
|
image_processor = ImageProcessor(
|
||||||
args.case, image_id, os.path.abspath(args.image),
|
args.case, image_id, os.path.abspath(args.image),
|
||||||
image_processor_options, args.config)
|
image_processor_options, args.config)
|
||||||
|
@ -134,9 +135,16 @@ def parse_args():
|
||||||
'--no_gzip', help='don\'t decompress gzip', action='store_true')
|
'--no_gzip', help='don\'t decompress gzip', action='store_true')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--no_zip', help='don\'t decompress zip', action='store_true')
|
'--no_zip', help='don\'t decompress zip', action='store_true')
|
||||||
|
parser.add_argument(
|
||||||
|
'--reparse',
|
||||||
|
help='reparse filesystem (will delete existing filesystem mapping)',
|
||||||
|
action='store_true')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--reindex', help='recreate index (will delete existing index)',
|
'--reindex', help='recreate index (will delete existing index)',
|
||||||
action='store_true')
|
action='store_true')
|
||||||
|
parser.add_argument(
|
||||||
|
'--delete', help='delete image (filesystem mapping and index)',
|
||||||
|
action='store_true')
|
||||||
|
|
||||||
# Search args
|
# Search args
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|
|
@ -290,7 +290,7 @@ class ImageProcessor():
|
||||||
self.case = case
|
self.case = case
|
||||||
self.config = dfdewey_config.load_config(config_file=config_file)
|
self.config = dfdewey_config.load_config(config_file=config_file)
|
||||||
self.opensearch = None
|
self.opensearch = None
|
||||||
self.image_hash = None
|
self.image_hash = image_id
|
||||||
self.image_id = image_id
|
self.image_id = image_id
|
||||||
self.image_path = image_path
|
self.image_path = image_path
|
||||||
self.options = options
|
self.options = options
|
||||||
|
@ -313,7 +313,7 @@ class ImageProcessor():
|
||||||
|
|
||||||
image_exists = False
|
image_exists = False
|
||||||
if not tables_exist:
|
if not tables_exist:
|
||||||
self._initialise_database()
|
self.postgresql.initialise_database()
|
||||||
else:
|
else:
|
||||||
image_exists = self.postgresql.value_exists(
|
image_exists = self.postgresql.value_exists(
|
||||||
'images', 'image_id', self.image_id)
|
'images', 'image_id', self.image_id)
|
||||||
|
@ -322,39 +322,89 @@ class ImageProcessor():
|
||||||
# case.
|
# case.
|
||||||
image_case_exists = False
|
image_case_exists = False
|
||||||
if image_exists:
|
if image_exists:
|
||||||
image_case = self.postgresql.query_single_row((
|
image_case_exists = self.postgresql.is_image_in_case(
|
||||||
'SELECT 1 from image_case '
|
self.image_id, self.case)
|
||||||
'WHERE image_id = \'{0:s}\' AND case_id = \'{1:s}\'').format(
|
|
||||||
self.image_id, self.case))
|
|
||||||
if image_case:
|
|
||||||
image_case_exists = True
|
|
||||||
else:
|
else:
|
||||||
self.postgresql.execute((
|
self.postgresql.insert_image(
|
||||||
'INSERT INTO images (image_id, image_path, image_hash) '
|
self.image_id, self.image_path, self.image_hash)
|
||||||
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
|
|
||||||
self.image_id, self.image_path, self.image_hash))
|
|
||||||
|
|
||||||
if not image_case_exists:
|
if not image_case_exists:
|
||||||
self.postgresql.execute((
|
self.postgresql.link_image_to_case(self.image_id, self.case)
|
||||||
'INSERT INTO image_case (case_id, image_id) '
|
|
||||||
'VALUES (\'{0:s}\', \'{1:s}\')').format(self.case, self.image_id))
|
|
||||||
|
|
||||||
return image_exists
|
return image_exists
|
||||||
|
|
||||||
def _create_filesystem_database(self):
|
def _connect_opensearch_datastore(self):
|
||||||
"""Create a filesystem database for the image."""
|
"""Connect to the Opensearch datastore."""
|
||||||
self.postgresql.execute((
|
if self.config:
|
||||||
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
|
self.opensearch = OpenSearchDataStore(
|
||||||
'PRIMARY KEY (block, inum, part))'))
|
host=self.config.OS_HOST, port=self.config.OS_PORT,
|
||||||
self.postgresql.execute((
|
url=self.config.OS_URL)
|
||||||
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
|
else:
|
||||||
'PRIMARY KEY (inum, filename, part))'))
|
self.opensearch = OpenSearchDataStore()
|
||||||
|
|
||||||
|
def _connect_postgresql_datastore(self):
|
||||||
|
"""Connect to the PostgreSQL datastore."""
|
||||||
|
if self.config:
|
||||||
|
self.postgresql = PostgresqlDataStore(
|
||||||
|
host=self.config.PG_HOST, port=self.config.PG_PORT,
|
||||||
|
db_name=self.config.PG_DB_NAME, autocommit=True)
|
||||||
|
else:
|
||||||
|
self.postgresql = PostgresqlDataStore(autocommit=True)
|
||||||
|
|
||||||
|
def _delete_image_data(self):
|
||||||
|
"""Delete image data.
|
||||||
|
|
||||||
|
Delete filesystem database and index for the image.
|
||||||
|
"""
|
||||||
|
self._connect_postgresql_datastore()
|
||||||
|
# Check if image is linked to case
|
||||||
|
image_in_case = self.postgresql.is_image_in_case(self.image_id, self.case)
|
||||||
|
if not image_in_case:
|
||||||
|
log.error(
|
||||||
|
'Image {0:s} does not exist in case {1:s}.'.format(
|
||||||
|
self.image_path, self.case))
|
||||||
|
return
|
||||||
|
|
||||||
|
# Unlink image from case
|
||||||
|
log.info(
|
||||||
|
'Removing image {0:s} from case {1:s}'.format(
|
||||||
|
self.image_path, self.case))
|
||||||
|
self.postgresql.unlink_image_from_case(self.image_id, self.case)
|
||||||
|
|
||||||
|
# Check if image is linked to other cases
|
||||||
|
cases = self.postgresql.get_image_cases(self.image_id)
|
||||||
|
if cases:
|
||||||
|
log.warning(
|
||||||
|
'Not deleting image {0:s} data. Still linked to cases: {1!s}'.format(
|
||||||
|
self.image_path, cases))
|
||||||
|
return
|
||||||
|
|
||||||
|
# Delete the image data
|
||||||
|
index_name = ''.join(('es', self.image_hash))
|
||||||
|
self._connect_opensearch_datastore()
|
||||||
|
index_exists = self.opensearch.index_exists(index_name)
|
||||||
|
if index_exists:
|
||||||
|
log.info('Deleting index {0:s}.'.format(index_name))
|
||||||
|
self.opensearch.delete_index(index_name)
|
||||||
|
else:
|
||||||
|
log.info('Index {0:s} does not exist.'.format(index_name))
|
||||||
|
|
||||||
|
db_name = ''.join(('fs', self.image_hash))
|
||||||
|
log.info('Deleting database {0:s}.'.format(db_name))
|
||||||
|
self.postgresql.delete_filesystem_database(db_name)
|
||||||
|
|
||||||
|
# Remove the image from the database
|
||||||
|
self.postgresql.delete_image(self.image_id)
|
||||||
|
log.info(
|
||||||
|
'Image {0:s} data has been removed from the datastores.'.format(
|
||||||
|
self.image_path))
|
||||||
|
|
||||||
def _extract_strings(self):
|
def _extract_strings(self):
|
||||||
"""String extraction.
|
"""String extraction.
|
||||||
|
|
||||||
Extract strings from the image using bulk_extractor.
|
Extract strings from the image using bulk_extractor.
|
||||||
"""
|
"""
|
||||||
|
self.output_path = tempfile.mkdtemp()
|
||||||
cmd = [
|
cmd = [
|
||||||
'bulk_extractor', '-o', self.output_path, '-x', 'all', '-e', 'wordlist'
|
'bulk_extractor', '-o', self.output_path, '-x', 'all', '-e', 'wordlist'
|
||||||
]
|
]
|
||||||
|
@ -371,11 +421,9 @@ class ImageProcessor():
|
||||||
|
|
||||||
log.info('Running bulk_extractor: [%s]', ' '.join(cmd))
|
log.info('Running bulk_extractor: [%s]', ' '.join(cmd))
|
||||||
try:
|
try:
|
||||||
output = subprocess.check_output(cmd)
|
subprocess.check_call(cmd)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
raise RuntimeError('String extraction failed.') from e
|
raise RuntimeError('String extraction failed.') from e
|
||||||
md5_offset = output.index(b'MD5') + 19
|
|
||||||
self.image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
|
|
||||||
|
|
||||||
def _get_volume_details(self, path_spec):
|
def _get_volume_details(self, path_spec):
|
||||||
"""Logs volume details for the given path spec.
|
"""Logs volume details for the given path spec.
|
||||||
|
@ -435,12 +483,7 @@ class ImageProcessor():
|
||||||
|
|
||||||
def _index_strings(self):
|
def _index_strings(self):
|
||||||
"""Index the extracted strings."""
|
"""Index the extracted strings."""
|
||||||
if self.config:
|
self._connect_opensearch_datastore()
|
||||||
self.opensearch = OpenSearchDataStore(
|
|
||||||
host=self.config.OS_HOST, port=self.config.OS_PORT,
|
|
||||||
url=self.config.OS_URL)
|
|
||||||
else:
|
|
||||||
self.opensearch = OpenSearchDataStore()
|
|
||||||
index_name = ''.join(('es', self.image_hash))
|
index_name = ''.join(('es', self.image_hash))
|
||||||
index_exists = self.opensearch.index_exists(index_name)
|
index_exists = self.opensearch.index_exists(index_name)
|
||||||
if index_exists:
|
if index_exists:
|
||||||
|
@ -488,40 +531,30 @@ class ImageProcessor():
|
||||||
records = self.opensearch.import_event(index_name)
|
records = self.opensearch.import_event(index_name)
|
||||||
log.info('Indexed %d records...', records)
|
log.info('Indexed %d records...', records)
|
||||||
|
|
||||||
def _initialise_database(self):
|
|
||||||
"""Initialse the image database."""
|
|
||||||
self.postgresql.execute((
|
|
||||||
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, '
|
|
||||||
'image_hash TEXT)'))
|
|
||||||
|
|
||||||
self.postgresql.execute((
|
|
||||||
'CREATE TABLE image_case ('
|
|
||||||
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
|
|
||||||
'PRIMARY KEY (case_id, image_id))'))
|
|
||||||
|
|
||||||
def _parse_filesystems(self):
|
def _parse_filesystems(self):
|
||||||
"""Filesystem parsing.
|
"""Filesystem parsing.
|
||||||
|
|
||||||
Parse each filesystem to create a mapping from byte offsets to files.
|
Parse each filesystem to create a mapping from byte offsets to files.
|
||||||
"""
|
"""
|
||||||
if self.config:
|
self._connect_postgresql_datastore()
|
||||||
self.postgresql = PostgresqlDataStore(
|
already_parsed = self._already_parsed()
|
||||||
host=self.config.PG_HOST, port=self.config.PG_PORT,
|
db_name = ''.join(('fs', self.image_hash))
|
||||||
db_name=self.config.PG_DB_NAME, autocommit=True)
|
if already_parsed:
|
||||||
else:
|
|
||||||
self.postgresql = PostgresqlDataStore(autocommit=True)
|
|
||||||
if self._already_parsed():
|
|
||||||
log.info('Image already parsed: [%s]', self.image_path)
|
log.info('Image already parsed: [%s]', self.image_path)
|
||||||
else:
|
if self.options.reparse:
|
||||||
db_name = ''.join(('fs', self.image_hash))
|
log.info('Reparsing.')
|
||||||
self.postgresql.execute('CREATE DATABASE {0:s}'.format(db_name))
|
self.postgresql.delete_filesystem_database(db_name)
|
||||||
|
log.info('Database %s deleted.', db_name)
|
||||||
|
already_parsed = False
|
||||||
|
if not already_parsed:
|
||||||
|
self.postgresql.create_database(db_name)
|
||||||
if self.config:
|
if self.config:
|
||||||
self.postgresql.switch_database(
|
self.postgresql.switch_database(
|
||||||
host=self.config.PG_HOST, port=self.config.PG_PORT, db_name=db_name)
|
host=self.config.PG_HOST, port=self.config.PG_PORT, db_name=db_name)
|
||||||
else:
|
else:
|
||||||
self.postgresql.switch_database(db_name=db_name)
|
self.postgresql.switch_database(db_name=db_name)
|
||||||
|
|
||||||
self._create_filesystem_database()
|
self.postgresql.create_filesystem_database()
|
||||||
|
|
||||||
# Scan image for volumes
|
# Scan image for volumes
|
||||||
options = volume_scanner.VolumeScannerOptions()
|
options = volume_scanner.VolumeScannerOptions()
|
||||||
|
@ -588,18 +621,21 @@ class ImageProcessor():
|
||||||
|
|
||||||
def process_image(self):
|
def process_image(self):
|
||||||
"""Process the image."""
|
"""Process the image."""
|
||||||
self.output_path = tempfile.mkdtemp()
|
if self.options.delete:
|
||||||
log.info('* Processing start: %s', datetime.now())
|
log.info('* Deleting image data: %s', datetime.now())
|
||||||
self._extract_strings()
|
self._delete_image_data()
|
||||||
log.info('String extraction complete.')
|
else:
|
||||||
|
log.info('* Parsing image: %s', datetime.now())
|
||||||
|
self._parse_filesystems()
|
||||||
|
log.info('Parsing complete.')
|
||||||
|
|
||||||
log.info('* Parsing image: %s', datetime.now())
|
log.info('* Extracting strings: %s', datetime.now())
|
||||||
self._parse_filesystems()
|
self._extract_strings()
|
||||||
log.info('Parsing complete.')
|
log.info('String extraction complete.')
|
||||||
|
|
||||||
log.info('* Indexing strings: %s', datetime.now())
|
log.info('* Indexing strings: %s', datetime.now())
|
||||||
self._index_strings()
|
self._index_strings()
|
||||||
log.info('Indexing complete.')
|
log.info('Indexing complete.')
|
||||||
|
|
||||||
log.info('* Processing complete: %s', datetime.now())
|
log.info('* Processing complete: %s', datetime.now())
|
||||||
|
|
||||||
|
@ -613,10 +649,14 @@ class ImageProcessorOptions():
|
||||||
unzip (bool): decompress zip.
|
unzip (bool): decompress zip.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, base64=True, gunzip=True, unzip=True, reindex=False):
|
def __init__(
|
||||||
|
self, base64=True, gunzip=True, unzip=True, reparse=False, reindex=False,
|
||||||
|
delete=False):
|
||||||
"""Initialise image processor options."""
|
"""Initialise image processor options."""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.base64 = base64
|
self.base64 = base64
|
||||||
self.gunzip = gunzip
|
self.gunzip = gunzip
|
||||||
self.unzip = unzip
|
self.unzip = unzip
|
||||||
|
self.reparse = reparse
|
||||||
self.reindex = reindex
|
self.reindex = reindex
|
||||||
|
self.delete = delete
|
||||||
|
|
|
@ -79,10 +79,8 @@ class ImageProcessorTest(unittest.TestCase):
|
||||||
image_processor.image_hash = TEST_IMAGE_HASH
|
image_processor.image_hash = TEST_IMAGE_HASH
|
||||||
return image_processor
|
return image_processor
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
'dfdewey.utils.image_processor.ImageProcessor._initialise_database')
|
|
||||||
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
|
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
|
||||||
def test_already_parsed(self, mock_postgresql, mock_initialise_database):
|
def test_already_parsed(self, mock_postgresql):
|
||||||
"""Test already parsed method."""
|
"""Test already parsed method."""
|
||||||
image_processor = self._get_image_processor()
|
image_processor = self._get_image_processor()
|
||||||
|
|
||||||
|
@ -91,78 +89,91 @@ class ImageProcessorTest(unittest.TestCase):
|
||||||
image_processor.postgresql = mock_postgresql
|
image_processor.postgresql = mock_postgresql
|
||||||
result = image_processor._already_parsed()
|
result = image_processor._already_parsed()
|
||||||
|
|
||||||
mock_initialise_database.assert_called_once()
|
mock_postgresql.initialise_database.assert_called_once()
|
||||||
calls = [
|
mock_postgresql.insert_image.assert_called_once_with(
|
||||||
mock.call((
|
TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH)
|
||||||
'INSERT INTO images (image_id, image_path, image_hash) '
|
mock_postgresql.link_image_to_case.assert_called_once_with(
|
||||||
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
|
TEST_IMAGE_ID, TEST_CASE)
|
||||||
TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH)),
|
|
||||||
mock.call((
|
|
||||||
'INSERT INTO image_case (case_id, image_id) '
|
|
||||||
'VALUES (\'{0:s}\', \'{1:s}\')').format(TEST_CASE, TEST_IMAGE_ID))
|
|
||||||
]
|
|
||||||
mock_postgresql.execute.assert_has_calls(calls)
|
|
||||||
self.assertEqual(result, False)
|
self.assertEqual(result, False)
|
||||||
|
|
||||||
# Test database exists, image already in case
|
# Test database exists, image already in case
|
||||||
mock_postgresql.table_exists.return_value = True
|
mock_postgresql.table_exists.return_value = True
|
||||||
mock_postgresql.value_exists.return_value = True
|
mock_postgresql.value_exists.return_value = True
|
||||||
mock_postgresql.query_single_row.return_value = (1,)
|
mock_postgresql.is_image_in_case.return_value = True
|
||||||
mock_postgresql.execute.reset_mock()
|
mock_postgresql.link_image_to_case.reset_mock()
|
||||||
|
|
||||||
image_processor.postgresql = mock_postgresql
|
image_processor.postgresql = mock_postgresql
|
||||||
result = image_processor._already_parsed()
|
result = image_processor._already_parsed()
|
||||||
mock_postgresql.execute.assert_not_called()
|
mock_postgresql.link_image_to_case.assert_not_called()
|
||||||
self.assertEqual(result, True)
|
self.assertEqual(result, True)
|
||||||
|
|
||||||
# Test database exists, image exists, but not in case
|
# Test database exists, image exists, but not in case
|
||||||
mock_postgresql.query_single_row.return_value = None
|
mock_postgresql.is_image_in_case.return_value = False
|
||||||
image_processor.postgresql = mock_postgresql
|
image_processor.postgresql = mock_postgresql
|
||||||
result = image_processor._already_parsed()
|
result = image_processor._already_parsed()
|
||||||
mock_postgresql.execute.assert_called_once_with((
|
mock_postgresql.link_image_to_case.assert_called_once_with(
|
||||||
'INSERT INTO image_case (case_id, image_id) '
|
TEST_IMAGE_ID, TEST_CASE)
|
||||||
'VALUES (\'{0:s}\', \'{1:s}\')').format(TEST_CASE, TEST_IMAGE_ID))
|
|
||||||
self.assertEqual(result, True)
|
self.assertEqual(result, True)
|
||||||
|
|
||||||
|
@mock.patch(
|
||||||
|
'dfdewey.utils.image_processor.ImageProcessor._connect_opensearch_datastore'
|
||||||
|
)
|
||||||
|
@mock.patch(
|
||||||
|
'dfdewey.utils.image_processor.ImageProcessor._connect_postgresql_datastore'
|
||||||
|
)
|
||||||
|
@mock.patch('dfdewey.datastore.opensearch.OpenSearchDataStore')
|
||||||
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
|
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
|
||||||
def test_create_filesystem_database(self, mock_postgresql):
|
def test_delete_image_data(
|
||||||
"""Test create filesystem database method."""
|
self, mock_postgresql, mock_opensearch, mock_connect_postgres,
|
||||||
|
mock_connect_opensearch):
|
||||||
|
"""Test delete image data method."""
|
||||||
image_processor = self._get_image_processor()
|
image_processor = self._get_image_processor()
|
||||||
image_processor.postgresql = mock_postgresql
|
image_processor.postgresql = mock_postgresql
|
||||||
image_processor._create_filesystem_database()
|
image_processor.opensearch = mock_opensearch
|
||||||
|
# Test if image is not in case
|
||||||
|
mock_postgresql.is_image_in_case.return_value = False
|
||||||
|
image_processor._delete_image_data()
|
||||||
|
mock_connect_postgres.assert_called_once()
|
||||||
|
mock_postgresql.unlink_image_from_case.assert_not_called()
|
||||||
|
|
||||||
calls = [
|
# Test if image is linked to multiple cases
|
||||||
mock.call((
|
mock_postgresql.is_image_in_case.return_value = True
|
||||||
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
|
mock_postgresql.get_image_cases.return_value = ['test']
|
||||||
'PRIMARY KEY (block, inum, part))')),
|
image_processor._delete_image_data()
|
||||||
mock.call((
|
mock_postgresql.get_image_cases.assert_called_once()
|
||||||
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
|
mock_connect_opensearch.assert_not_called()
|
||||||
'PRIMARY KEY (inum, filename, part))'))
|
|
||||||
]
|
|
||||||
mock_postgresql.execute.assert_has_calls(calls)
|
|
||||||
|
|
||||||
@mock.patch('subprocess.check_output')
|
# Test if index exists
|
||||||
def test_extract_strings(self, mock_subprocess):
|
mock_postgresql.get_image_cases.return_value = None
|
||||||
|
mock_opensearch.index_exists.return_value = True
|
||||||
|
image_processor._delete_image_data()
|
||||||
|
mock_opensearch.delete_index.assert_called_once()
|
||||||
|
mock_postgresql.delete_filesystem_database.assert_called_once()
|
||||||
|
mock_postgresql.delete_image.assert_called_once()
|
||||||
|
|
||||||
|
# Test if index doesn't exist
|
||||||
|
mock_opensearch.delete_index.reset_mock()
|
||||||
|
mock_opensearch.index_exists.return_value = False
|
||||||
|
image_processor._delete_image_data()
|
||||||
|
mock_opensearch.delete_index.assert_not_called()
|
||||||
|
|
||||||
|
@mock.patch('tempfile.mkdtemp')
|
||||||
|
@mock.patch('subprocess.check_call')
|
||||||
|
def test_extract_strings(self, mock_subprocess, mock_mkdtemp):
|
||||||
"""Test extract strings method."""
|
"""Test extract strings method."""
|
||||||
image_processor = self._get_image_processor()
|
image_processor = self._get_image_processor()
|
||||||
image_processor.output_path = '/tmp/tmpxaemz75r'
|
mock_mkdtemp.return_value = '/tmp/tmpxaemz75r'
|
||||||
image_processor.image_hash = None
|
|
||||||
|
|
||||||
# Test with default options
|
# Test with default options
|
||||||
mock_subprocess.return_value = 'MD5 of Disk Image: {0:s}'.format(
|
|
||||||
TEST_IMAGE_HASH).encode('utf-8')
|
|
||||||
image_processor._extract_strings()
|
image_processor._extract_strings()
|
||||||
mock_subprocess.assert_called_once_with([
|
mock_subprocess.assert_called_once_with([
|
||||||
'bulk_extractor', '-o', '/tmp/tmpxaemz75r', '-x', 'all', '-e',
|
'bulk_extractor', '-o', '/tmp/tmpxaemz75r', '-x', 'all', '-e',
|
||||||
'wordlist', '-e', 'base64', '-e', 'gzip', '-e', 'zip', '-S',
|
'wordlist', '-e', 'base64', '-e', 'gzip', '-e', 'zip', '-S',
|
||||||
'strings=YES', '-S', 'word_max=1000000', TEST_IMAGE
|
'strings=YES', '-S', 'word_max=1000000', TEST_IMAGE
|
||||||
])
|
])
|
||||||
self.assertEqual(image_processor.image_hash, TEST_IMAGE_HASH)
|
|
||||||
|
|
||||||
# Test options
|
# Test options
|
||||||
mock_subprocess.reset_mock()
|
mock_subprocess.reset_mock()
|
||||||
mock_subprocess.return_value = 'MD5 of Disk Image: {0:s}'.format(
|
|
||||||
TEST_IMAGE_HASH).encode('utf-8')
|
|
||||||
image_processor.options.base64 = False
|
image_processor.options.base64 = False
|
||||||
image_processor.options.gunzip = False
|
image_processor.options.gunzip = False
|
||||||
image_processor.options.unzip = False
|
image_processor.options.unzip = False
|
||||||
|
@ -264,33 +275,17 @@ class ImageProcessorTest(unittest.TestCase):
|
||||||
self.assertEqual(mock_index_record.call_count, 3)
|
self.assertEqual(mock_index_record.call_count, 3)
|
||||||
mock_import_event.assert_called_once()
|
mock_import_event.assert_called_once()
|
||||||
|
|
||||||
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
|
|
||||||
def test_initialise_database(self, mock_postgresql):
|
|
||||||
"""Test initialise database method."""
|
|
||||||
image_processor = self._get_image_processor()
|
|
||||||
image_processor.postgresql = mock_postgresql
|
|
||||||
calls = [
|
|
||||||
mock.call(
|
|
||||||
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, image_hash TEXT)'
|
|
||||||
),
|
|
||||||
mock.call((
|
|
||||||
'CREATE TABLE image_case ('
|
|
||||||
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
|
|
||||||
'PRIMARY KEY (case_id, image_id))'))
|
|
||||||
]
|
|
||||||
image_processor._initialise_database()
|
|
||||||
mock_postgresql.execute.assert_has_calls(calls)
|
|
||||||
|
|
||||||
@mock.patch('psycopg2.connect')
|
@mock.patch('psycopg2.connect')
|
||||||
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._already_parsed')
|
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._already_parsed')
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
'dfdewey.datastore.postgresql.PostgresqlDataStore.switch_database')
|
'dfdewey.datastore.postgresql.PostgresqlDataStore.switch_database')
|
||||||
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.execute')
|
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore._execute')
|
||||||
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.bulk_insert')
|
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.bulk_insert')
|
||||||
def test_parse_filesystems(
|
def test_parse_filesystems(
|
||||||
self, mock_bulk_insert, mock_execute, mock_switch_database,
|
self, mock_bulk_insert, mock_execute, mock_switch_database,
|
||||||
mock_already_parsed, _):
|
mock_already_parsed, _):
|
||||||
"""Test parse filesystems method."""
|
"""Test parse filesystems method."""
|
||||||
|
db_name = ''.join(('fs', TEST_IMAGE_HASH))
|
||||||
image_processor = self._get_image_processor()
|
image_processor = self._get_image_processor()
|
||||||
|
|
||||||
# Test image already parsed
|
# Test image already parsed
|
||||||
|
@ -298,6 +293,13 @@ class ImageProcessorTest(unittest.TestCase):
|
||||||
image_processor._parse_filesystems()
|
image_processor._parse_filesystems()
|
||||||
mock_execute.assert_not_called()
|
mock_execute.assert_not_called()
|
||||||
|
|
||||||
|
# Test reparse flag
|
||||||
|
image_processor.options.reparse = True
|
||||||
|
image_processor._parse_filesystems()
|
||||||
|
mock_execute.assert_any_call('DROP DATABASE {0:s}'.format(db_name))
|
||||||
|
mock_execute.reset_mock()
|
||||||
|
mock_switch_database.reset_mock()
|
||||||
|
|
||||||
# Test image not parsed
|
# Test image not parsed
|
||||||
current_path = os.path.abspath(os.path.dirname(__file__))
|
current_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
image_processor.image_path = os.path.join(
|
image_processor.image_path = os.path.join(
|
||||||
|
@ -305,8 +307,7 @@ class ImageProcessorTest(unittest.TestCase):
|
||||||
mock_already_parsed.return_value = False
|
mock_already_parsed.return_value = False
|
||||||
image_processor._parse_filesystems()
|
image_processor._parse_filesystems()
|
||||||
self.assertEqual(mock_execute.call_count, 3)
|
self.assertEqual(mock_execute.call_count, 3)
|
||||||
mock_switch_database.assert_called_once_with(
|
mock_switch_database.assert_called_once_with(db_name=db_name)
|
||||||
db_name=''.join(('fs', TEST_IMAGE_HASH)))
|
|
||||||
self.assertIsInstance(image_processor.scanner, FileEntryScanner)
|
self.assertIsInstance(image_processor.scanner, FileEntryScanner)
|
||||||
self.assertEqual(len(image_processor.path_specs), 2)
|
self.assertEqual(len(image_processor.path_specs), 2)
|
||||||
ntfs_path_spec = image_processor.path_specs[0]
|
ntfs_path_spec = image_processor.path_specs[0]
|
||||||
|
@ -337,17 +338,20 @@ class ImageProcessorTest(unittest.TestCase):
|
||||||
current_path, '..', '..', 'test_data', 'test.dmg')
|
current_path, '..', '..', 'test_data', 'test.dmg')
|
||||||
image_processor._parse_filesystems()
|
image_processor._parse_filesystems()
|
||||||
|
|
||||||
|
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._delete_image_data')
|
||||||
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._parse_filesystems')
|
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._parse_filesystems')
|
||||||
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._index_strings')
|
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._index_strings')
|
||||||
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._extract_strings')
|
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._extract_strings')
|
||||||
def test_process_image(
|
def test_process_image(
|
||||||
self, mock_extract_strings, mock_index_strings, mock_parse_filesystems):
|
self, mock_extract_strings, mock_index_strings, mock_parse_filesystems,
|
||||||
|
mock_delete_image_data):
|
||||||
"""Test process image method."""
|
"""Test process image method."""
|
||||||
image_processor = self._get_image_processor()
|
image_processor = self._get_image_processor()
|
||||||
image_processor.process_image()
|
image_processor.process_image()
|
||||||
mock_extract_strings.assert_called_once()
|
mock_extract_strings.assert_called_once()
|
||||||
mock_index_strings.assert_called_once()
|
mock_index_strings.assert_called_once()
|
||||||
mock_parse_filesystems.assert_called_once()
|
mock_parse_filesystems.assert_called_once()
|
||||||
|
mock_delete_image_data.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -91,39 +91,11 @@ class IndexSearcher():
|
||||||
|
|
||||||
if image != 'all':
|
if image != 'all':
|
||||||
self.image = os.path.abspath(self.image)
|
self.image = os.path.abspath(self.image)
|
||||||
self._get_image_hash()
|
image_hash = self.postgresql.get_image_hash(self.image_id)
|
||||||
|
if image_hash:
|
||||||
|
self.images[image_hash] = self.image
|
||||||
else:
|
else:
|
||||||
self._get_case_images()
|
self.images = self.postgresql.get_case_images(self.case)
|
||||||
|
|
||||||
def _get_case_images(self):
|
|
||||||
"""Get all images for the case.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A dictionary of the images in the case.
|
|
||||||
"""
|
|
||||||
images = self.postgresql.query((
|
|
||||||
'SELECT image_hash, image_path FROM image_case NATURAL JOIN images '
|
|
||||||
'WHERE case_id = \'{0:s}\'').format(self.case))
|
|
||||||
for image_hash, image_path in images:
|
|
||||||
self.images[image_hash] = image_path
|
|
||||||
|
|
||||||
def _get_filenames_from_inode(self, inode, location):
|
|
||||||
"""Gets filename(s) from an inode number.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
inode: Inode number of target file
|
|
||||||
location: Partition number
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Filename(s) of given inode or None
|
|
||||||
"""
|
|
||||||
results = self.postgresql.query((
|
|
||||||
'SELECT filename FROM files '
|
|
||||||
'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location))
|
|
||||||
filenames = []
|
|
||||||
for result in results:
|
|
||||||
filenames.append(result[0])
|
|
||||||
return filenames
|
|
||||||
|
|
||||||
def _get_filenames_from_offset(self, image_path, image_hash, offset):
|
def _get_filenames_from_offset(self, image_path, image_hash, offset):
|
||||||
"""Gets filename(s) given a byte offset within an image.
|
"""Gets filename(s) given a byte offset within an image.
|
||||||
|
@ -173,14 +145,13 @@ class IndexSearcher():
|
||||||
except TypeError as e:
|
except TypeError as e:
|
||||||
log.error('Error opening image: %s', e)
|
log.error('Error opening image: %s', e)
|
||||||
|
|
||||||
inodes = self._get_inodes(
|
inodes = self.postgresql.get_inodes(
|
||||||
int((offset - partition_offset) / block_size), hit_location)
|
int((offset - partition_offset) / block_size), hit_location)
|
||||||
|
|
||||||
if inodes:
|
if inodes:
|
||||||
for i in inodes:
|
for inode in inodes:
|
||||||
inode = i[0]
|
|
||||||
# Account for resident files
|
# Account for resident files
|
||||||
if (i[0] == 0 and
|
if (inode == 0 and
|
||||||
filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT):
|
filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT):
|
||||||
mft_record_size_offset = 0x40 + partition_offset
|
mft_record_size_offset = 0x40 + partition_offset
|
||||||
mft_record_size = int.from_bytes(
|
mft_record_size = int.from_bytes(
|
||||||
|
@ -192,39 +163,13 @@ class IndexSearcher():
|
||||||
inode = self._get_ntfs_resident_inode((offset - partition_offset),
|
inode = self._get_ntfs_resident_inode((offset - partition_offset),
|
||||||
filesystem, mft_record_size)
|
filesystem, mft_record_size)
|
||||||
|
|
||||||
inode_filenames = self._get_filenames_from_inode(inode, hit_location)
|
inode_filenames = self.postgresql.get_filenames_from_inode(
|
||||||
|
inode, hit_location)
|
||||||
filename = '\n'.join(inode_filenames)
|
filename = '\n'.join(inode_filenames)
|
||||||
filenames.append('{0:s} ({1:d})'.format(filename, inode))
|
filenames.append('{0:s} ({1:d})'.format(filename, inode))
|
||||||
|
|
||||||
return filenames
|
return filenames
|
||||||
|
|
||||||
def _get_image_hash(self):
|
|
||||||
"""Get an image hash from the datastore.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
MD5 hash for the image stored in PostgreSQL.
|
|
||||||
"""
|
|
||||||
image_hash = self.postgresql.query_single_row(
|
|
||||||
'SELECT image_hash FROM images WHERE image_id = \'{0:s}\''.format(
|
|
||||||
self.image_id))
|
|
||||||
if image_hash:
|
|
||||||
self.images[image_hash[0]] = self.image
|
|
||||||
|
|
||||||
def _get_inodes(self, block, location):
|
|
||||||
"""Gets inode numbers for a block offset.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
block (int): block offset within the image.
|
|
||||||
location (str): Partition location / identifier.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Inode number(s) of the given block or None.
|
|
||||||
"""
|
|
||||||
inodes = self.postgresql.query(
|
|
||||||
('SELECT inum FROM blocks '
|
|
||||||
'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location))
|
|
||||||
return inodes
|
|
||||||
|
|
||||||
def _get_ntfs_resident_inode(self, offset, filesystem, mft_record_size):
|
def _get_ntfs_resident_inode(self, offset, filesystem, mft_record_size):
|
||||||
"""Gets the inode number associated with NTFS $MFT resident data.
|
"""Gets the inode number associated with NTFS $MFT resident data.
|
||||||
|
|
||||||
|
|
|
@ -39,14 +39,14 @@ class IndexSearcherTest(unittest.TestCase):
|
||||||
Test index searcher.
|
Test index searcher.
|
||||||
"""
|
"""
|
||||||
with mock.patch('psycopg2.connect'), mock.patch(
|
with mock.patch('psycopg2.connect'), mock.patch(
|
||||||
'dfdewey.datastore.postgresql.PostgresqlDataStore.query_single_row'
|
'dfdewey.datastore.postgresql.PostgresqlDataStore._query_single_row'
|
||||||
) as mock_query_single_row:
|
) as mock_query_single_row:
|
||||||
mock_query_single_row.return_value = (TEST_IMAGE_HASH,)
|
mock_query_single_row.return_value = (TEST_IMAGE_HASH,)
|
||||||
index_searcher = IndexSearcher(TEST_CASE, TEST_IMAGE_ID, TEST_IMAGE)
|
index_searcher = IndexSearcher(TEST_CASE, TEST_IMAGE_ID, TEST_IMAGE)
|
||||||
index_searcher.config = None
|
index_searcher.config = None
|
||||||
return index_searcher
|
return index_searcher
|
||||||
|
|
||||||
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.query')
|
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore._query')
|
||||||
def test_get_case_images(self, mock_query):
|
def test_get_case_images(self, mock_query):
|
||||||
"""Test get case images method."""
|
"""Test get case images method."""
|
||||||
mock_query.return_value = [(
|
mock_query.return_value = [(
|
||||||
|
@ -61,19 +61,10 @@ class IndexSearcherTest(unittest.TestCase):
|
||||||
self.assertEqual(index_searcher.images['hash1'], 'image1.dd')
|
self.assertEqual(index_searcher.images['hash1'], 'image1.dd')
|
||||||
self.assertEqual(index_searcher.images['hash2'], 'image2.dd')
|
self.assertEqual(index_searcher.images['hash2'], 'image2.dd')
|
||||||
|
|
||||||
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.query')
|
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.get_inodes')
|
||||||
def test_get_filenames_from_inode(self, mock_query):
|
|
||||||
"""Test get filenames from inode method."""
|
|
||||||
index_searcher = self._get_index_searcher()
|
|
||||||
mock_query.return_value = [('test.txt',), ('test.txt:ads',)]
|
|
||||||
filenames = index_searcher._get_filenames_from_inode(42, '/p1')
|
|
||||||
self.assertEqual(len(filenames), 2)
|
|
||||||
self.assertEqual(filenames[0], 'test.txt')
|
|
||||||
self.assertEqual(filenames[1], 'test.txt:ads')
|
|
||||||
|
|
||||||
@mock.patch('dfdewey.utils.index_searcher.IndexSearcher._get_inodes')
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
'dfdewey.utils.index_searcher.IndexSearcher._get_filenames_from_inode')
|
'dfdewey.datastore.postgresql.PostgresqlDataStore.get_filenames_from_inode'
|
||||||
|
)
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
'dfdewey.datastore.postgresql.PostgresqlDataStore.switch_database')
|
'dfdewey.datastore.postgresql.PostgresqlDataStore.switch_database')
|
||||||
def test_get_filenames_from_offset(
|
def test_get_filenames_from_offset(
|
||||||
|
@ -94,7 +85,7 @@ class IndexSearcherTest(unittest.TestCase):
|
||||||
|
|
||||||
# Test offset within a file
|
# Test offset within a file
|
||||||
mock_get_inodes.reset_mock()
|
mock_get_inodes.reset_mock()
|
||||||
mock_get_inodes.return_value = [(0,)]
|
mock_get_inodes.return_value = [0]
|
||||||
mock_get_filenames_from_inode.return_value = ['adams.txt']
|
mock_get_filenames_from_inode.return_value = ['adams.txt']
|
||||||
filenames = index_searcher._get_filenames_from_offset(
|
filenames = index_searcher._get_filenames_from_offset(
|
||||||
image_path, TEST_IMAGE_HASH, 1133936)
|
image_path, TEST_IMAGE_HASH, 1133936)
|
||||||
|
@ -104,7 +95,7 @@ class IndexSearcherTest(unittest.TestCase):
|
||||||
|
|
||||||
# Test volume image
|
# Test volume image
|
||||||
mock_get_inodes.reset_mock()
|
mock_get_inodes.reset_mock()
|
||||||
mock_get_inodes.return_value = [(2,)]
|
mock_get_inodes.return_value = [2]
|
||||||
mock_get_filenames_from_inode.reset_mock()
|
mock_get_filenames_from_inode.reset_mock()
|
||||||
mock_get_filenames_from_inode.return_value = []
|
mock_get_filenames_from_inode.return_value = []
|
||||||
image_path = os.path.join(
|
image_path = os.path.join(
|
||||||
|
|
|
@ -12,8 +12,8 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# Use the official Docker Hub Ubuntu 18.04 base image
|
# Use the official Docker Hub Ubuntu 20.04 base image
|
||||||
FROM ubuntu:18.04
|
FROM ubuntu:20.04
|
||||||
|
|
||||||
# Update the base image
|
# Update the base image
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
# Using dfDewey
|
# Using dfDewey
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
usage: dfdewey [-h] [-c CONFIG] [--no_base64] [--no_gzip] [--no_zip]
|
usage: dfdewey [-h] [-c CONFIG] [--no_base64] [--no_gzip] [--no_zip] [--reparse] [--reindex] [--delete] [--highlight] [-s SEARCH] [--search_list SEARCH_LIST] case [image]
|
||||||
[--reindex] [--highlight] [-s SEARCH]
|
|
||||||
[--search_list SEARCH_LIST]
|
|
||||||
case [image]
|
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
case case ID
|
case case ID
|
||||||
|
@ -17,7 +14,9 @@ optional arguments:
|
||||||
--no_base64 don't decode base64
|
--no_base64 don't decode base64
|
||||||
--no_gzip don't decompress gzip
|
--no_gzip don't decompress gzip
|
||||||
--no_zip don't decompress zip
|
--no_zip don't decompress zip
|
||||||
|
--reparse reparse filesystem (will delete existing filesystem mapping)
|
||||||
--reindex recreate index (will delete existing index)
|
--reindex recreate index (will delete existing index)
|
||||||
|
--delete delete image (filesystem mapping and index)
|
||||||
--highlight highlight search term in results
|
--highlight highlight search term in results
|
||||||
-s SEARCH, --search SEARCH
|
-s SEARCH, --search SEARCH
|
||||||
search query
|
search query
|
||||||
|
@ -77,6 +76,13 @@ dfDewey will have bulk_extractor decode base64 data, and decompress gzip / zip
|
||||||
data by default. These can be disabled by adding the flags `--no_base64`,
|
data by default. These can be disabled by adding the flags `--no_base64`,
|
||||||
`--no_gzip`, and `--no_zip`.
|
`--no_gzip`, and `--no_zip`.
|
||||||
|
|
||||||
|
If an image has already been processed, you can opt to reparse and reindex the
|
||||||
|
image (this will first delete the existing data) by adding the flags
|
||||||
|
`--reparse` and `--reindex`.
|
||||||
|
|
||||||
|
You can also delete the data for a given image from the datastores by adding
|
||||||
|
the `--delete` flag.
|
||||||
|
|
||||||
## Searching
|
## Searching
|
||||||
|
|
||||||
To search the index for a single image, you need to supply a `CASE`, `IMAGE`,
|
To search the index for a single image, you need to supply a `CASE`, `IMAGE`,
|
||||||
|
|
Loading…
Reference in a new issue