Add image reparse and deletion functions (#31)

* Update readme for bulk_extractor v2.0.0

* Update docker image to Ubuntu 20.04

* Parse filesystem before string extraction

* Refactor postgres datastore code

* Add reparse option

* Add option to delete image data

* Update usage

* Update version
This commit is contained in:
Jason 2022-06-03 15:35:43 +10:00 committed by GitHub
parent b61a835d4a
commit 7aadd41ee2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 541 additions and 237 deletions

View file

@ -8,10 +8,8 @@ dfDewey is a digital forensics string extraction, indexing, and searching tool.
## Requirements
### bulk_extractor
dfDewey currently requires bulk_extractor for string extraction.
bulk_extractor can be downloaded and built from source here:
https://github.com/simsong/bulk_extractor
bulk_extractor can also be installed from the GIFT PPA.
bulk_extractor can be installed from the GIFT PPA.
```shell
sudo add-apt-repository ppa:gift/stable
@ -19,6 +17,11 @@ sudo apt update
sudo apt install -y bulk-extractor
```
bulk_extractor can also be downloaded and built from source here:
https://github.com/simsong/bulk_extractor
Note: bulk_extractor v1.6.0 is recommended (v2.0.0 is not yet supported).
### dfVFS
[dfVFS](https://github.com/log2timeline/dfvfs) is required for image parsing. It
can be installed from the GIFT PPA.

View file

@ -17,4 +17,4 @@
dfDewey is a digital forensics string extraction, indexing, and searching tool.
"""
__version__ = '20211220'
__version__ = '20220603'

View file

@ -44,19 +44,7 @@ class PostgresqlDataStore():
except AttributeError:
pass
def bulk_insert(self, table_spec, rows):
"""Execute a bulk insert into a table.
Args:
table_spec: String in the form 'table_name (col1, col2, ..., coln)'
rows: Array of value tuples to be inserted
"""
extras.execute_values(
self.cursor,
'INSERT INTO {0:s} VALUES %s ON CONFLICT DO NOTHING'.format(table_spec),
rows)
def execute(self, command):
def _execute(self, command):
"""Execute a command in the PostgreSQL database.
Args:
@ -64,7 +52,7 @@ class PostgresqlDataStore():
"""
self.cursor.execute(command)
def query(self, query):
def _query(self, query):
"""Query the database.
Args:
@ -77,7 +65,7 @@ class PostgresqlDataStore():
return self.cursor.fetchall()
def query_single_row(self, query):
def _query_single_row(self, query):
"""Query the database for a single row.
Args:
@ -90,6 +78,191 @@ class PostgresqlDataStore():
return self.cursor.fetchone()
def bulk_insert(self, table_spec, rows):
"""Execute a bulk insert into a table.
Args:
table_spec: String in the form 'table_name (col1, col2, ..., coln)'
rows: Array of value tuples to be inserted
"""
extras.execute_values(
self.cursor,
'INSERT INTO {0:s} VALUES %s ON CONFLICT DO NOTHING'.format(table_spec),
rows)
def create_database(self, db_name):
"""Create a database for the image.
Args:
db_name: Database name
"""
self._execute('CREATE DATABASE {0:s}'.format(db_name))
def create_filesystem_database(self):
"""Create a filesystem database for the image."""
self._execute((
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
'PRIMARY KEY (block, inum, part))'))
self._execute((
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
'PRIMARY KEY (inum, filename, part))'))
def delete_filesystem_database(self, db_name):
"""Delete the filesystem database for the image.
Args:
db_name: The name of the database to drop
"""
self._execute('DROP DATABASE {0:s}'.format(db_name))
def delete_image(self, image_id):
"""Delete an image from the database.
Args:
image_id: Image identifier
"""
self._execute(
'DELETE FROM images WHERE image_id = \'{0:s}\''.format(image_id))
def get_case_images(self, case):
"""Get all images for the case.
Args:
case: Case name
Returns:
A dictionary of the images in the case.
"""
images = {}
results = self._query((
'SELECT image_hash, image_path FROM image_case NATURAL JOIN images '
'WHERE case_id = \'{0:s}\'').format(case))
for image_hash, image_path in results:
images[image_hash] = image_path
return images
def get_filenames_from_inode(self, inode, location):
"""Gets filename(s) from an inode number.
Args:
inode: Inode number of target file
location: Partition number
Returns:
Filename(s) of given inode or None
"""
results = self._query((
'SELECT filename FROM files '
'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location))
filenames = []
for result in results:
filenames.append(result[0])
return filenames
def get_image_cases(self, image_id):
"""Get a list of cases the image is linked to.
Args:
image_id: Image identifier
Returns:
List of cases or None.
"""
cases = self._query(
'SELECT case_id FROM image_case WHERE image_id = \'{0:s}\''.format(
image_id))
for c in range(len(cases)):
cases[c] = cases[c][0]
return cases
def get_image_hash(self, image_id):
"""Get an image hash from the database.
Args:
image_id: Image identifier
Returns:
Hash for the image stored in PostgreSQL or None.
"""
image_hash = self._query_single_row(
'SELECT image_hash FROM images WHERE image_id = \'{0:s}\''.format(
image_id))
if image_hash:
return image_hash[0]
else:
return None
def get_inodes(self, block, location):
"""Gets inode numbers for a block offset.
Args:
block (int): block offset within the image.
location (str): Partition location / identifier.
Returns:
Inode number(s) of the given block or None.
"""
inodes = self._query(
('SELECT inum FROM blocks '
'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location))
for i in range(len(inodes)):
inodes[i] = inodes[i][0]
return inodes
def initialise_database(self):
"""Initialse the image database."""
self._execute((
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, '
'image_hash TEXT)'))
self._execute((
'CREATE TABLE image_case ('
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
'PRIMARY KEY (case_id, image_id))'))
def insert_image(self, image_id, image_path, image_hash):
"""Add an image to the database.
Args:
image_id: Image identifier
image_path: Path to the image file
image_hash: Hash of the image
"""
self._execute((
'INSERT INTO images (image_id, image_path, image_hash) '
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
image_id, image_path, image_hash))
def is_image_in_case(self, image_id, case):
"""Check if an image is attached to a case.
Args:
image_id: Image identifier
case: Case name
Returns:
True if the image is attached to the case, otherwise False.
"""
image_case = self._query_single_row((
'SELECT 1 from image_case '
'WHERE image_id = \'{0:s}\' AND case_id = \'{1:s}\'').format(
image_id, case))
if image_case:
return True
else:
return False
def link_image_to_case(self, image_id, case):
"""Attaches an image to a case.
Args:
image_id: Image identifier
case: Case name
"""
self._execute((
'INSERT INTO image_case (case_id, image_id) '
'VALUES (\'{0:s}\', \'{1:s}\')').format(case, image_id))
def switch_database(
self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
"""Connects to a different database.
@ -128,6 +301,19 @@ class PostgresqlDataStore():
return self.cursor.fetchone() is not None
def unlink_image_from_case(self, image_id, case):
"""Removes an image from a case.
Args:
image_id: Image identifier
case: Case name
"""
self._execute(
"""
DELETE FROM image_case
WHERE case_id = '{0:s}' AND image_id = '{1:s}'""".format(
case, image_id))
def value_exists(self, table_name, column_name, value):
"""Check if a value exists in a table.

View file

@ -20,6 +20,7 @@ import mock
from psycopg2 import OperationalError
from dfdewey.datastore.postgresql import PostgresqlDataStore
from dfdewey.utils.image_processor_test import TEST_CASE, TEST_IMAGE, TEST_IMAGE_HASH, TEST_IMAGE_ID
class PostgresqlTest(unittest.TestCase):
@ -47,15 +48,90 @@ class PostgresqlTest(unittest.TestCase):
'VALUES %s ON CONFLICT DO NOTHING')
mock_execute_values.assert_called_once_with(db.cursor, expected_sql, rows)
def test_create_filesystem_database(self):
"""Test create filesystem database method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'execute') as mock_execute:
db.create_filesystem_database()
calls = [
mock.call((
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
'PRIMARY KEY (block, inum, part))')),
mock.call((
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
'PRIMARY KEY (inum, filename, part))'))
]
mock_execute.assert_has_calls(calls)
def test_delete_filesystem_database(self):
"""Test delete filesystem database method."""
db = self._get_datastore()
db_name = ''.join(('fs', TEST_IMAGE_HASH))
with mock.patch.object(db.cursor, 'execute') as mock_execute:
db.delete_filesystem_database(db_name)
mock_execute.assert_called_once_with(
'DROP DATABASE {0:s}'.format(db_name))
def test_delete_image(self):
"""Test delete image method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'execute') as mock_execute:
db.delete_image(TEST_IMAGE_ID)
mock_execute.assert_called_once_with(
'DELETE FROM images WHERE image_id = \'{0:s}\''.format(TEST_IMAGE_ID))
def test_execute(self):
"""Test execute method."""
db = self._get_datastore()
command = (
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
with mock.patch.object(db.cursor, 'execute') as mock_execute:
db.execute(command)
db._execute(command)
mock_execute.assert_called_once_with(command)
def test_get_case_images(self):
"""Test get case images method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'fetchall',
return_value=[(TEST_IMAGE_HASH, TEST_IMAGE)]):
images = db.get_case_images(TEST_CASE)
self.assertEqual(images, {TEST_IMAGE_HASH: TEST_IMAGE})
def test_get_filenames_from_inode(self):
"""Test get filenames from inode method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'fetchall',
return_value=[('test.txt',), ('test.txt:ads',)]):
filenames = db.get_filenames_from_inode(42, '/p1')
self.assertEqual(len(filenames), 2)
self.assertEqual(filenames[0], 'test.txt')
self.assertEqual(filenames[1], 'test.txt:ads')
def test_get_image_cases(self):
"""Test get image cases method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'fetchall', return_value=[('test',),
('test2',)]):
cases = db.get_image_cases(TEST_IMAGE_ID)
self.assertEqual(cases[0], 'test')
self.assertEqual(cases[1], 'test2')
def test_get_image_hash(self):
"""Test get image hash method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'fetchone',
return_value=(TEST_IMAGE_HASH,)):
image_hash = db.get_image_hash(TEST_IMAGE_ID)
self.assertEqual(image_hash, TEST_IMAGE_HASH)
def test_get_inodes(self):
"""Test get inodes method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'fetchall', return_value=[(10,), (19,)]):
inodes = db.get_inodes(1234, '/p1')
self.assertEqual(inodes, [10, 19])
@mock.patch('psycopg2.connect')
def test_init(self, mock_connect):
"""Test init method."""
@ -63,12 +139,57 @@ class PostgresqlTest(unittest.TestCase):
with self.assertRaises(RuntimeError):
db = PostgresqlDataStore()
def test_initialise_database(self):
"""Test initialise database method."""
db = self._get_datastore()
calls = [
mock.call(
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, image_hash TEXT)'
),
mock.call((
'CREATE TABLE image_case ('
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
'PRIMARY KEY (case_id, image_id))'))
]
with mock.patch.object(db.cursor, 'execute') as mock_execute:
db.initialise_database()
mock_execute.assert_has_calls(calls)
def test_insert_image(self):
"""Test insert image method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'execute') as mock_execute:
db.insert_image(TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH)
mock_execute.assert_called_once_with((
'INSERT INTO images (image_id, image_path, image_hash) '
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH))
def test_is_image_in_case(self):
"""Test is image in case method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'fetchone', return_value=(1,)):
result = db.is_image_in_case(TEST_IMAGE_ID, TEST_CASE)
self.assertTrue(result)
with mock.patch.object(db.cursor, 'fetchone', return_value=None):
result = db.is_image_in_case(TEST_IMAGE_ID, TEST_CASE)
self.assertFalse(result)
def test_link_image_to_case(self):
"""Test link image to case method."""
db = self._get_datastore()
with mock.patch.object(db.cursor, 'execute') as mock_execute:
db.link_image_to_case(TEST_IMAGE_ID, TEST_CASE)
mock_execute.assert_called_once_with((
'INSERT INTO image_case (case_id, image_id) '
'VALUES (\'{0:s}\', \'{1:s}\')').format(TEST_CASE, TEST_IMAGE_ID))
def test_query(self):
"""Test query method."""
db = self._get_datastore()
query = 'SELECT filename FROM files WHERE inum = 0'
with mock.patch.object(db.cursor, 'fetchall', return_value=[('$MFT',)]):
results = db.query(query)
results = db._query(query)
self.assertEqual(results, [('$MFT',)])
@ -79,7 +200,7 @@ class PostgresqlTest(unittest.TestCase):
'SELECT 1 from image_case WHERE image_hash = '
'\'d41d8cd98f00b204e9800998ecf8427e\'')
with mock.patch.object(db.cursor, 'fetchone', return_value=(1,)):
results = db.query_single_row(query)
results = db._query_single_row(query)
self.assertEqual(results, (1,))

View file

@ -100,7 +100,8 @@ def main():
log.error('Image must be supplied for processing.')
sys.exit(1)
image_processor_options = ImageProcessorOptions(
not args.no_base64, not args.no_gzip, not args.no_zip, args.reindex)
not args.no_base64, not args.no_gzip, not args.no_zip, args.reparse,
args.reindex, args.delete)
image_processor = ImageProcessor(
args.case, image_id, os.path.abspath(args.image),
image_processor_options, args.config)
@ -134,9 +135,16 @@ def parse_args():
'--no_gzip', help='don\'t decompress gzip', action='store_true')
parser.add_argument(
'--no_zip', help='don\'t decompress zip', action='store_true')
parser.add_argument(
'--reparse',
help='reparse filesystem (will delete existing filesystem mapping)',
action='store_true')
parser.add_argument(
'--reindex', help='recreate index (will delete existing index)',
action='store_true')
parser.add_argument(
'--delete', help='delete image (filesystem mapping and index)',
action='store_true')
# Search args
parser.add_argument(

View file

@ -290,7 +290,7 @@ class ImageProcessor():
self.case = case
self.config = dfdewey_config.load_config(config_file=config_file)
self.opensearch = None
self.image_hash = None
self.image_hash = image_id
self.image_id = image_id
self.image_path = image_path
self.options = options
@ -313,7 +313,7 @@ class ImageProcessor():
image_exists = False
if not tables_exist:
self._initialise_database()
self.postgresql.initialise_database()
else:
image_exists = self.postgresql.value_exists(
'images', 'image_id', self.image_id)
@ -322,39 +322,89 @@ class ImageProcessor():
# case.
image_case_exists = False
if image_exists:
image_case = self.postgresql.query_single_row((
'SELECT 1 from image_case '
'WHERE image_id = \'{0:s}\' AND case_id = \'{1:s}\'').format(
self.image_id, self.case))
if image_case:
image_case_exists = True
image_case_exists = self.postgresql.is_image_in_case(
self.image_id, self.case)
else:
self.postgresql.execute((
'INSERT INTO images (image_id, image_path, image_hash) '
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
self.image_id, self.image_path, self.image_hash))
self.postgresql.insert_image(
self.image_id, self.image_path, self.image_hash)
if not image_case_exists:
self.postgresql.execute((
'INSERT INTO image_case (case_id, image_id) '
'VALUES (\'{0:s}\', \'{1:s}\')').format(self.case, self.image_id))
self.postgresql.link_image_to_case(self.image_id, self.case)
return image_exists
def _create_filesystem_database(self):
"""Create a filesystem database for the image."""
self.postgresql.execute((
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
'PRIMARY KEY (block, inum, part))'))
self.postgresql.execute((
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
'PRIMARY KEY (inum, filename, part))'))
def _connect_opensearch_datastore(self):
"""Connect to the Opensearch datastore."""
if self.config:
self.opensearch = OpenSearchDataStore(
host=self.config.OS_HOST, port=self.config.OS_PORT,
url=self.config.OS_URL)
else:
self.opensearch = OpenSearchDataStore()
def _connect_postgresql_datastore(self):
"""Connect to the PostgreSQL datastore."""
if self.config:
self.postgresql = PostgresqlDataStore(
host=self.config.PG_HOST, port=self.config.PG_PORT,
db_name=self.config.PG_DB_NAME, autocommit=True)
else:
self.postgresql = PostgresqlDataStore(autocommit=True)
def _delete_image_data(self):
"""Delete image data.
Delete filesystem database and index for the image.
"""
self._connect_postgresql_datastore()
# Check if image is linked to case
image_in_case = self.postgresql.is_image_in_case(self.image_id, self.case)
if not image_in_case:
log.error(
'Image {0:s} does not exist in case {1:s}.'.format(
self.image_path, self.case))
return
# Unlink image from case
log.info(
'Removing image {0:s} from case {1:s}'.format(
self.image_path, self.case))
self.postgresql.unlink_image_from_case(self.image_id, self.case)
# Check if image is linked to other cases
cases = self.postgresql.get_image_cases(self.image_id)
if cases:
log.warning(
'Not deleting image {0:s} data. Still linked to cases: {1!s}'.format(
self.image_path, cases))
return
# Delete the image data
index_name = ''.join(('es', self.image_hash))
self._connect_opensearch_datastore()
index_exists = self.opensearch.index_exists(index_name)
if index_exists:
log.info('Deleting index {0:s}.'.format(index_name))
self.opensearch.delete_index(index_name)
else:
log.info('Index {0:s} does not exist.'.format(index_name))
db_name = ''.join(('fs', self.image_hash))
log.info('Deleting database {0:s}.'.format(db_name))
self.postgresql.delete_filesystem_database(db_name)
# Remove the image from the database
self.postgresql.delete_image(self.image_id)
log.info(
'Image {0:s} data has been removed from the datastores.'.format(
self.image_path))
def _extract_strings(self):
"""String extraction.
Extract strings from the image using bulk_extractor.
"""
self.output_path = tempfile.mkdtemp()
cmd = [
'bulk_extractor', '-o', self.output_path, '-x', 'all', '-e', 'wordlist'
]
@ -371,11 +421,9 @@ class ImageProcessor():
log.info('Running bulk_extractor: [%s]', ' '.join(cmd))
try:
output = subprocess.check_output(cmd)
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
raise RuntimeError('String extraction failed.') from e
md5_offset = output.index(b'MD5') + 19
self.image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
def _get_volume_details(self, path_spec):
"""Logs volume details for the given path spec.
@ -435,12 +483,7 @@ class ImageProcessor():
def _index_strings(self):
"""Index the extracted strings."""
if self.config:
self.opensearch = OpenSearchDataStore(
host=self.config.OS_HOST, port=self.config.OS_PORT,
url=self.config.OS_URL)
else:
self.opensearch = OpenSearchDataStore()
self._connect_opensearch_datastore()
index_name = ''.join(('es', self.image_hash))
index_exists = self.opensearch.index_exists(index_name)
if index_exists:
@ -488,40 +531,30 @@ class ImageProcessor():
records = self.opensearch.import_event(index_name)
log.info('Indexed %d records...', records)
def _initialise_database(self):
"""Initialse the image database."""
self.postgresql.execute((
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, '
'image_hash TEXT)'))
self.postgresql.execute((
'CREATE TABLE image_case ('
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
'PRIMARY KEY (case_id, image_id))'))
def _parse_filesystems(self):
"""Filesystem parsing.
Parse each filesystem to create a mapping from byte offsets to files.
"""
if self.config:
self.postgresql = PostgresqlDataStore(
host=self.config.PG_HOST, port=self.config.PG_PORT,
db_name=self.config.PG_DB_NAME, autocommit=True)
else:
self.postgresql = PostgresqlDataStore(autocommit=True)
if self._already_parsed():
self._connect_postgresql_datastore()
already_parsed = self._already_parsed()
db_name = ''.join(('fs', self.image_hash))
if already_parsed:
log.info('Image already parsed: [%s]', self.image_path)
else:
db_name = ''.join(('fs', self.image_hash))
self.postgresql.execute('CREATE DATABASE {0:s}'.format(db_name))
if self.options.reparse:
log.info('Reparsing.')
self.postgresql.delete_filesystem_database(db_name)
log.info('Database %s deleted.', db_name)
already_parsed = False
if not already_parsed:
self.postgresql.create_database(db_name)
if self.config:
self.postgresql.switch_database(
host=self.config.PG_HOST, port=self.config.PG_PORT, db_name=db_name)
else:
self.postgresql.switch_database(db_name=db_name)
self._create_filesystem_database()
self.postgresql.create_filesystem_database()
# Scan image for volumes
options = volume_scanner.VolumeScannerOptions()
@ -588,18 +621,21 @@ class ImageProcessor():
def process_image(self):
"""Process the image."""
self.output_path = tempfile.mkdtemp()
log.info('* Processing start: %s', datetime.now())
self._extract_strings()
log.info('String extraction complete.')
if self.options.delete:
log.info('* Deleting image data: %s', datetime.now())
self._delete_image_data()
else:
log.info('* Parsing image: %s', datetime.now())
self._parse_filesystems()
log.info('Parsing complete.')
log.info('* Parsing image: %s', datetime.now())
self._parse_filesystems()
log.info('Parsing complete.')
log.info('* Extracting strings: %s', datetime.now())
self._extract_strings()
log.info('String extraction complete.')
log.info('* Indexing strings: %s', datetime.now())
self._index_strings()
log.info('Indexing complete.')
log.info('* Indexing strings: %s', datetime.now())
self._index_strings()
log.info('Indexing complete.')
log.info('* Processing complete: %s', datetime.now())
@ -613,10 +649,14 @@ class ImageProcessorOptions():
unzip (bool): decompress zip.
"""
def __init__(self, base64=True, gunzip=True, unzip=True, reindex=False):
def __init__(
self, base64=True, gunzip=True, unzip=True, reparse=False, reindex=False,
delete=False):
"""Initialise image processor options."""
super().__init__()
self.base64 = base64
self.gunzip = gunzip
self.unzip = unzip
self.reparse = reparse
self.reindex = reindex
self.delete = delete

View file

@ -79,10 +79,8 @@ class ImageProcessorTest(unittest.TestCase):
image_processor.image_hash = TEST_IMAGE_HASH
return image_processor
@mock.patch(
'dfdewey.utils.image_processor.ImageProcessor._initialise_database')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
def test_already_parsed(self, mock_postgresql, mock_initialise_database):
def test_already_parsed(self, mock_postgresql):
"""Test already parsed method."""
image_processor = self._get_image_processor()
@ -91,78 +89,91 @@ class ImageProcessorTest(unittest.TestCase):
image_processor.postgresql = mock_postgresql
result = image_processor._already_parsed()
mock_initialise_database.assert_called_once()
calls = [
mock.call((
'INSERT INTO images (image_id, image_path, image_hash) '
'VALUES (\'{0:s}\', \'{1:s}\', \'{2:s}\')').format(
TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH)),
mock.call((
'INSERT INTO image_case (case_id, image_id) '
'VALUES (\'{0:s}\', \'{1:s}\')').format(TEST_CASE, TEST_IMAGE_ID))
]
mock_postgresql.execute.assert_has_calls(calls)
mock_postgresql.initialise_database.assert_called_once()
mock_postgresql.insert_image.assert_called_once_with(
TEST_IMAGE_ID, TEST_IMAGE, TEST_IMAGE_HASH)
mock_postgresql.link_image_to_case.assert_called_once_with(
TEST_IMAGE_ID, TEST_CASE)
self.assertEqual(result, False)
# Test database exists, image already in case
mock_postgresql.table_exists.return_value = True
mock_postgresql.value_exists.return_value = True
mock_postgresql.query_single_row.return_value = (1,)
mock_postgresql.execute.reset_mock()
mock_postgresql.is_image_in_case.return_value = True
mock_postgresql.link_image_to_case.reset_mock()
image_processor.postgresql = mock_postgresql
result = image_processor._already_parsed()
mock_postgresql.execute.assert_not_called()
mock_postgresql.link_image_to_case.assert_not_called()
self.assertEqual(result, True)
# Test database exists, image exists, but not in case
mock_postgresql.query_single_row.return_value = None
mock_postgresql.is_image_in_case.return_value = False
image_processor.postgresql = mock_postgresql
result = image_processor._already_parsed()
mock_postgresql.execute.assert_called_once_with((
'INSERT INTO image_case (case_id, image_id) '
'VALUES (\'{0:s}\', \'{1:s}\')').format(TEST_CASE, TEST_IMAGE_ID))
mock_postgresql.link_image_to_case.assert_called_once_with(
TEST_IMAGE_ID, TEST_CASE)
self.assertEqual(result, True)
@mock.patch(
'dfdewey.utils.image_processor.ImageProcessor._connect_opensearch_datastore'
)
@mock.patch(
'dfdewey.utils.image_processor.ImageProcessor._connect_postgresql_datastore'
)
@mock.patch('dfdewey.datastore.opensearch.OpenSearchDataStore')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
def test_create_filesystem_database(self, mock_postgresql):
"""Test create filesystem database method."""
def test_delete_image_data(
self, mock_postgresql, mock_opensearch, mock_connect_postgres,
mock_connect_opensearch):
"""Test delete image data method."""
image_processor = self._get_image_processor()
image_processor.postgresql = mock_postgresql
image_processor._create_filesystem_database()
image_processor.opensearch = mock_opensearch
# Test if image is not in case
mock_postgresql.is_image_in_case.return_value = False
image_processor._delete_image_data()
mock_connect_postgres.assert_called_once()
mock_postgresql.unlink_image_from_case.assert_not_called()
calls = [
mock.call((
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
'PRIMARY KEY (block, inum, part))')),
mock.call((
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
'PRIMARY KEY (inum, filename, part))'))
]
mock_postgresql.execute.assert_has_calls(calls)
# Test if image is linked to multiple cases
mock_postgresql.is_image_in_case.return_value = True
mock_postgresql.get_image_cases.return_value = ['test']
image_processor._delete_image_data()
mock_postgresql.get_image_cases.assert_called_once()
mock_connect_opensearch.assert_not_called()
@mock.patch('subprocess.check_output')
def test_extract_strings(self, mock_subprocess):
# Test if index exists
mock_postgresql.get_image_cases.return_value = None
mock_opensearch.index_exists.return_value = True
image_processor._delete_image_data()
mock_opensearch.delete_index.assert_called_once()
mock_postgresql.delete_filesystem_database.assert_called_once()
mock_postgresql.delete_image.assert_called_once()
# Test if index doesn't exist
mock_opensearch.delete_index.reset_mock()
mock_opensearch.index_exists.return_value = False
image_processor._delete_image_data()
mock_opensearch.delete_index.assert_not_called()
@mock.patch('tempfile.mkdtemp')
@mock.patch('subprocess.check_call')
def test_extract_strings(self, mock_subprocess, mock_mkdtemp):
"""Test extract strings method."""
image_processor = self._get_image_processor()
image_processor.output_path = '/tmp/tmpxaemz75r'
image_processor.image_hash = None
mock_mkdtemp.return_value = '/tmp/tmpxaemz75r'
# Test with default options
mock_subprocess.return_value = 'MD5 of Disk Image: {0:s}'.format(
TEST_IMAGE_HASH).encode('utf-8')
image_processor._extract_strings()
mock_subprocess.assert_called_once_with([
'bulk_extractor', '-o', '/tmp/tmpxaemz75r', '-x', 'all', '-e',
'wordlist', '-e', 'base64', '-e', 'gzip', '-e', 'zip', '-S',
'strings=YES', '-S', 'word_max=1000000', TEST_IMAGE
])
self.assertEqual(image_processor.image_hash, TEST_IMAGE_HASH)
# Test options
mock_subprocess.reset_mock()
mock_subprocess.return_value = 'MD5 of Disk Image: {0:s}'.format(
TEST_IMAGE_HASH).encode('utf-8')
image_processor.options.base64 = False
image_processor.options.gunzip = False
image_processor.options.unzip = False
@ -264,33 +275,17 @@ class ImageProcessorTest(unittest.TestCase):
self.assertEqual(mock_index_record.call_count, 3)
mock_import_event.assert_called_once()
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore')
def test_initialise_database(self, mock_postgresql):
"""Test initialise database method."""
image_processor = self._get_image_processor()
image_processor.postgresql = mock_postgresql
calls = [
mock.call(
'CREATE TABLE images (image_id TEXT PRIMARY KEY, image_path TEXT, image_hash TEXT)'
),
mock.call((
'CREATE TABLE image_case ('
'case_id TEXT, image_id TEXT REFERENCES images(image_id), '
'PRIMARY KEY (case_id, image_id))'))
]
image_processor._initialise_database()
mock_postgresql.execute.assert_has_calls(calls)
@mock.patch('psycopg2.connect')
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._already_parsed')
@mock.patch(
'dfdewey.datastore.postgresql.PostgresqlDataStore.switch_database')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.execute')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore._execute')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.bulk_insert')
def test_parse_filesystems(
self, mock_bulk_insert, mock_execute, mock_switch_database,
mock_already_parsed, _):
"""Test parse filesystems method."""
db_name = ''.join(('fs', TEST_IMAGE_HASH))
image_processor = self._get_image_processor()
# Test image already parsed
@ -298,6 +293,13 @@ class ImageProcessorTest(unittest.TestCase):
image_processor._parse_filesystems()
mock_execute.assert_not_called()
# Test reparse flag
image_processor.options.reparse = True
image_processor._parse_filesystems()
mock_execute.assert_any_call('DROP DATABASE {0:s}'.format(db_name))
mock_execute.reset_mock()
mock_switch_database.reset_mock()
# Test image not parsed
current_path = os.path.abspath(os.path.dirname(__file__))
image_processor.image_path = os.path.join(
@ -305,8 +307,7 @@ class ImageProcessorTest(unittest.TestCase):
mock_already_parsed.return_value = False
image_processor._parse_filesystems()
self.assertEqual(mock_execute.call_count, 3)
mock_switch_database.assert_called_once_with(
db_name=''.join(('fs', TEST_IMAGE_HASH)))
mock_switch_database.assert_called_once_with(db_name=db_name)
self.assertIsInstance(image_processor.scanner, FileEntryScanner)
self.assertEqual(len(image_processor.path_specs), 2)
ntfs_path_spec = image_processor.path_specs[0]
@ -337,17 +338,20 @@ class ImageProcessorTest(unittest.TestCase):
current_path, '..', '..', 'test_data', 'test.dmg')
image_processor._parse_filesystems()
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._delete_image_data')
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._parse_filesystems')
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._index_strings')
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._extract_strings')
def test_process_image(
self, mock_extract_strings, mock_index_strings, mock_parse_filesystems):
self, mock_extract_strings, mock_index_strings, mock_parse_filesystems,
mock_delete_image_data):
"""Test process image method."""
image_processor = self._get_image_processor()
image_processor.process_image()
mock_extract_strings.assert_called_once()
mock_index_strings.assert_called_once()
mock_parse_filesystems.assert_called_once()
mock_delete_image_data.assert_not_called()
if __name__ == '__main__':

View file

@ -91,39 +91,11 @@ class IndexSearcher():
if image != 'all':
self.image = os.path.abspath(self.image)
self._get_image_hash()
image_hash = self.postgresql.get_image_hash(self.image_id)
if image_hash:
self.images[image_hash] = self.image
else:
self._get_case_images()
def _get_case_images(self):
"""Get all images for the case.
Returns:
A dictionary of the images in the case.
"""
images = self.postgresql.query((
'SELECT image_hash, image_path FROM image_case NATURAL JOIN images '
'WHERE case_id = \'{0:s}\'').format(self.case))
for image_hash, image_path in images:
self.images[image_hash] = image_path
def _get_filenames_from_inode(self, inode, location):
"""Gets filename(s) from an inode number.
Args:
inode: Inode number of target file
location: Partition number
Returns:
Filename(s) of given inode or None
"""
results = self.postgresql.query((
'SELECT filename FROM files '
'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location))
filenames = []
for result in results:
filenames.append(result[0])
return filenames
self.images = self.postgresql.get_case_images(self.case)
def _get_filenames_from_offset(self, image_path, image_hash, offset):
"""Gets filename(s) given a byte offset within an image.
@ -173,14 +145,13 @@ class IndexSearcher():
except TypeError as e:
log.error('Error opening image: %s', e)
inodes = self._get_inodes(
inodes = self.postgresql.get_inodes(
int((offset - partition_offset) / block_size), hit_location)
if inodes:
for i in inodes:
inode = i[0]
for inode in inodes:
# Account for resident files
if (i[0] == 0 and
if (inode == 0 and
filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT):
mft_record_size_offset = 0x40 + partition_offset
mft_record_size = int.from_bytes(
@ -192,39 +163,13 @@ class IndexSearcher():
inode = self._get_ntfs_resident_inode((offset - partition_offset),
filesystem, mft_record_size)
inode_filenames = self._get_filenames_from_inode(inode, hit_location)
inode_filenames = self.postgresql.get_filenames_from_inode(
inode, hit_location)
filename = '\n'.join(inode_filenames)
filenames.append('{0:s} ({1:d})'.format(filename, inode))
return filenames
def _get_image_hash(self):
"""Get an image hash from the datastore.
Returns:
MD5 hash for the image stored in PostgreSQL.
"""
image_hash = self.postgresql.query_single_row(
'SELECT image_hash FROM images WHERE image_id = \'{0:s}\''.format(
self.image_id))
if image_hash:
self.images[image_hash[0]] = self.image
def _get_inodes(self, block, location):
"""Gets inode numbers for a block offset.
Args:
block (int): block offset within the image.
location (str): Partition location / identifier.
Returns:
Inode number(s) of the given block or None.
"""
inodes = self.postgresql.query(
('SELECT inum FROM blocks '
'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location))
return inodes
def _get_ntfs_resident_inode(self, offset, filesystem, mft_record_size):
"""Gets the inode number associated with NTFS $MFT resident data.

View file

@ -39,14 +39,14 @@ class IndexSearcherTest(unittest.TestCase):
Test index searcher.
"""
with mock.patch('psycopg2.connect'), mock.patch(
'dfdewey.datastore.postgresql.PostgresqlDataStore.query_single_row'
'dfdewey.datastore.postgresql.PostgresqlDataStore._query_single_row'
) as mock_query_single_row:
mock_query_single_row.return_value = (TEST_IMAGE_HASH,)
index_searcher = IndexSearcher(TEST_CASE, TEST_IMAGE_ID, TEST_IMAGE)
index_searcher.config = None
return index_searcher
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.query')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore._query')
def test_get_case_images(self, mock_query):
"""Test get case images method."""
mock_query.return_value = [(
@ -61,19 +61,10 @@ class IndexSearcherTest(unittest.TestCase):
self.assertEqual(index_searcher.images['hash1'], 'image1.dd')
self.assertEqual(index_searcher.images['hash2'], 'image2.dd')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.query')
def test_get_filenames_from_inode(self, mock_query):
"""Test get filenames from inode method."""
index_searcher = self._get_index_searcher()
mock_query.return_value = [('test.txt',), ('test.txt:ads',)]
filenames = index_searcher._get_filenames_from_inode(42, '/p1')
self.assertEqual(len(filenames), 2)
self.assertEqual(filenames[0], 'test.txt')
self.assertEqual(filenames[1], 'test.txt:ads')
@mock.patch('dfdewey.utils.index_searcher.IndexSearcher._get_inodes')
@mock.patch('dfdewey.datastore.postgresql.PostgresqlDataStore.get_inodes')
@mock.patch(
'dfdewey.utils.index_searcher.IndexSearcher._get_filenames_from_inode')
'dfdewey.datastore.postgresql.PostgresqlDataStore.get_filenames_from_inode'
)
@mock.patch(
'dfdewey.datastore.postgresql.PostgresqlDataStore.switch_database')
def test_get_filenames_from_offset(
@ -94,7 +85,7 @@ class IndexSearcherTest(unittest.TestCase):
# Test offset within a file
mock_get_inodes.reset_mock()
mock_get_inodes.return_value = [(0,)]
mock_get_inodes.return_value = [0]
mock_get_filenames_from_inode.return_value = ['adams.txt']
filenames = index_searcher._get_filenames_from_offset(
image_path, TEST_IMAGE_HASH, 1133936)
@ -104,7 +95,7 @@ class IndexSearcherTest(unittest.TestCase):
# Test volume image
mock_get_inodes.reset_mock()
mock_get_inodes.return_value = [(2,)]
mock_get_inodes.return_value = [2]
mock_get_filenames_from_inode.reset_mock()
mock_get_filenames_from_inode.return_value = []
image_path = os.path.join(

View file

@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Use the official Docker Hub Ubuntu 18.04 base image
FROM ubuntu:18.04
# Use the official Docker Hub Ubuntu 20.04 base image
FROM ubuntu:20.04
# Update the base image
ENV DEBIAN_FRONTEND=noninteractive

View file

@ -1,10 +1,7 @@
# Using dfDewey
```shell
usage: dfdewey [-h] [-c CONFIG] [--no_base64] [--no_gzip] [--no_zip]
[--reindex] [--highlight] [-s SEARCH]
[--search_list SEARCH_LIST]
case [image]
usage: dfdewey [-h] [-c CONFIG] [--no_base64] [--no_gzip] [--no_zip] [--reparse] [--reindex] [--delete] [--highlight] [-s SEARCH] [--search_list SEARCH_LIST] case [image]
positional arguments:
case case ID
@ -17,7 +14,9 @@ optional arguments:
--no_base64 don't decode base64
--no_gzip don't decompress gzip
--no_zip don't decompress zip
--reparse reparse filesystem (will delete existing filesystem mapping)
--reindex recreate index (will delete existing index)
--delete delete image (filesystem mapping and index)
--highlight highlight search term in results
-s SEARCH, --search SEARCH
search query
@ -77,6 +76,13 @@ dfDewey will have bulk_extractor decode base64 data, and decompress gzip / zip
data by default. These can be disabled by adding the flags `--no_base64`,
`--no_gzip`, and `--no_zip`.
If an image has already been processed, you can opt to reparse and reindex the
image (this will first delete the existing data) by adding the flags
`--reparse` and `--reindex`.
You can also delete the data for a given image from the datastores by adding
the `--delete` flag.
## Searching
To search the index for a single image, you need to supply a `CASE`, `IMAGE`,