Merge pull request #14 from google/flush-db

Flush DB after filesystem parsing
This commit is contained in:
Jason 2021-04-07 15:52:25 +10:00 committed by GitHub
commit 184e1933d6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 8 deletions

View file

@ -59,7 +59,7 @@ def main():
log.error('Image must be supplied for processing.')
sys.exit(1)
image_processor_options = ImageProcessorOptions(
not args.no_base64, not args.no_gzip, not args.no_zip)
not args.no_base64, not args.no_gzip, not args.no_zip, args.reindex)
image_processor = ImageProcessor(
args.case, os.path.abspath(args.image), image_processor_options)
image_processor.process_image()
@ -90,6 +90,9 @@ def parse_args():
'--no_gzip', help='don\'t decompress gzip', action='store_true')
parser.add_argument(
'--no_zip', help='don\'t decompress zip', action='store_true')
parser.add_argument(
'--reindex', help='recreate index (will delete existing index)',
action='store_true')
# Search args
parser.add_argument('-s', '--search', help='search query')

View file

@ -410,9 +410,15 @@ class ImageProcessor():
"""Index the extracted strings."""
self.elasticsearch = ElasticsearchDataStore()
index_name = ''.join(('es', self.image_hash))
if self.elasticsearch.index_exists(index_name):
index_exists = self.elasticsearch.index_exists(index_name)
if index_exists:
log.info('Image already indexed: [%s]', self.image_path)
else:
if self.options.reindex:
log.info('Reindexing.')
self.elasticsearch.delete_index(index_name)
log.info('Index %s deleted.', index_name)
index_exists = False
if not index_exists:
index_name = self.elasticsearch.create_index(index_name=index_name)
log.info('Index %s created.', index_name)
@ -476,6 +482,8 @@ class ImageProcessor():
self._create_filesystem_database()
# Scan image for volumes
dfvfs_definitions.PREFERRED_GPT_BACK_END = (
dfvfs_definitions.TYPE_INDICATOR_GPT)
mediator = UnattendedVolumeScannerMediator()
try:
self.scanner = FileEntryScanner(mediator=mediator)
@ -498,6 +506,7 @@ class ImageProcessor():
else:
log.warning(
'Volume type %s is not supported.', path_spec.type_indicator)
self.postgresql.db.commit()
def _parse_inodes(self, location, start_offset):
"""Parse filesystem inodes.
@ -556,12 +565,13 @@ class ImageProcessorOptions():
unzip (bool): decompress zip.
"""
def __init__(self, base64=True, gunzip=True, unzip=True):
def __init__(self, base64=True, gunzip=True, unzip=True, reindex=False):
"""Initialise image processor options."""
super().__init__()
self.base64 = base64
self.gunzip = gunzip
self.unzip = unzip
self.reindex = reindex
class UnattendedVolumeScannerMediator(volume_scanner.VolumeScannerMediator):
@ -585,6 +595,25 @@ class UnattendedVolumeScannerMediator(volume_scanner.VolumeScannerMediator):
for volume_index in range(1, volume_system.number_of_volumes + 1)
]
def GetLVMVolumeIdentifiers(self, volume_system, volume_identifiers):
"""Retrieves LVM volume identifiers.
This method can be used to prompt the user to provide LVM volume
identifiers.
Args:
volume_system (LVMVolumeSystem): volume system.
volume_identifiers (list[str]): volume identifiers including prefix.
Returns:
list[str]: selected volume identifiers including prefix or None.
"""
prefix = 'lvm'
return [
'{0:s}{1:d}'.format(prefix, volume_index)
for volume_index in range(1, volume_system.number_of_volumes + 1)
]
def GetPartitionIdentifiers(self, volume_system, volume_identifiers):
"""Retrieves partition identifiers.

View file

@ -225,7 +225,7 @@ class ImageProcessorTest(unittest.TestCase):
mock_elasticsearch.import_event.assert_called_once_with(
index_name, event=json_record)
@mock.patch('elasticsearch.client.IndicesClient.create')
@mock.patch('elasticsearch.client.IndicesClient')
@mock.patch('dfdewey.utils.image_processor.ImageProcessor._index_record')
@mock.patch('dfdewey.datastore.elastic.ElasticsearchDataStore.index_exists')
@mock.patch('dfdewey.datastore.elastic.ElasticsearchDataStore.import_event')
@ -244,6 +244,18 @@ class ImageProcessorTest(unittest.TestCase):
image_processor._index_strings()
mock_index_record.assert_not_called()
# Test reindex flag
image_processor.options.reindex = True
image_processor._index_strings()
mock_create_index.assert_called_once_with(
index_name=''.join(('es', TEST_IMAGE_HASH)))
self.assertEqual(mock_index_record.call_count, 3)
mock_import_event.assert_called_once()
image_processor.options.reindex = False
mock_create_index.reset_mock()
mock_index_record.reset_mock()
mock_import_event.reset_mock()
# Test new index
mock_index_exists.return_value = False
mock_index_record.return_value = 10000000

View file

@ -15,12 +15,13 @@ libfsxfs-python >= 20201114
libfvde-python >= 20160719
libfwnt-python >= 20160418
libluksde-python >= 20200101
libqcow-python >= 20131204
libqcow-python >= 20201213
libsigscan-python >= 20191221
libsmdev-python >= 20140529
libsmraw-python >= 20140612
libvhdi-python >= 20201014
libvmdk-python >= 20140421
libvsgpt-python >= 20210207
libvshadow-python >= 20160109
libvslvm-python >= 20160109
pytsk3 >= 20160721

View file

@ -1,7 +1,7 @@
# Using dfDewey
```shell
usage: dfdcli.py [-h] [--no_base64] [--no_gzip] [--no_zip] [-s SEARCH] [--search_list SEARCH_LIST] case [image]
usage: dfdcli.py [-h] [--no_base64] [--no_gzip] [--no_zip] [--reindex] [-s SEARCH] [--search_list SEARCH_LIST] case [image]
positional arguments:
case case ID
@ -12,11 +12,11 @@ optional arguments:
--no_base64 don't decode base64
--no_gzip don't decompress gzip
--no_zip don't decompress zip
--reindex recreate index (will delete existing index)
-s SEARCH, --search SEARCH
search query
--search_list SEARCH_LIST
file with search queries
```
## Docker