Merge pull request #14 from google/flush-db

Flush DB after filesystem parsing
2021-04-07 15:52:25 +10:00 · 2021-04-07 15:52:25 +10:00 · 184e1933d6
commit 184e1933d6
parent 35d8ea1810 cf95892300
5 changed files with 53 additions and 8 deletions
--- a/dfdewey/dfdcli.py
+++ b/dfdewey/dfdcli.py
@ -59,7 +59,7 @@ def main():
      log.error('Image must be supplied for processing.')
      sys.exit(1)
    image_processor_options = ImageProcessorOptions(
-        not args.no_base64, not args.no_gzip, not args.no_zip)
+        not args.no_base64, not args.no_gzip, not args.no_zip, args.reindex)
    image_processor = ImageProcessor(
        args.case, os.path.abspath(args.image), image_processor_options)
    image_processor.process_image()
@ -90,6 +90,9 @@ def parse_args():
      '--no_gzip', help='don\'t decompress gzip', action='store_true')
  parser.add_argument(
      '--no_zip', help='don\'t decompress zip', action='store_true')
  parser.add_argument(
      '--reindex', help='recreate index (will delete existing index)',
      action='store_true')
  # Search args
  parser.add_argument('-s', '--search', help='search query')
--- a/dfdewey/utils/image_processor.py
+++ b/dfdewey/utils/image_processor.py
@ -410,9 +410,15 @@ class ImageProcessor():
    """Index the extracted strings."""
    self.elasticsearch = ElasticsearchDataStore()
    index_name = ''.join(('es', self.image_hash))
-    if self.elasticsearch.index_exists(index_name):
+    index_exists = self.elasticsearch.index_exists(index_name)
    if index_exists:
      log.info('Image already indexed: [%s]', self.image_path)
-    else:
+      if self.options.reindex:
        log.info('Reindexing.')
        self.elasticsearch.delete_index(index_name)
        log.info('Index %s deleted.', index_name)
        index_exists = False
    if not index_exists:
      index_name = self.elasticsearch.create_index(index_name=index_name)
      log.info('Index %s created.', index_name)
@ -476,6 +482,8 @@ class ImageProcessor():
      self._create_filesystem_database()
      # Scan image for volumes
      dfvfs_definitions.PREFERRED_GPT_BACK_END = (
          dfvfs_definitions.TYPE_INDICATOR_GPT)
      mediator = UnattendedVolumeScannerMediator()
      try:
        self.scanner = FileEntryScanner(mediator=mediator)
@ -498,6 +506,7 @@ class ImageProcessor():
        else:
          log.warning(
              'Volume type %s is not supported.', path_spec.type_indicator)
      self.postgresql.db.commit()
  def _parse_inodes(self, location, start_offset):
    """Parse filesystem inodes.
@ -556,12 +565,13 @@ class ImageProcessorOptions():
    unzip (bool): decompress zip.
  """
-  def __init__(self, base64=True, gunzip=True, unzip=True):
+  def __init__(self, base64=True, gunzip=True, unzip=True, reindex=False):
    """Initialise image processor options."""
    super().__init__()
    self.base64 = base64
    self.gunzip = gunzip
    self.unzip = unzip
    self.reindex = reindex
 class UnattendedVolumeScannerMediator(volume_scanner.VolumeScannerMediator):
@ -585,6 +595,25 @@ class UnattendedVolumeScannerMediator(volume_scanner.VolumeScannerMediator):
        for volume_index in range(1, volume_system.number_of_volumes + 1)
    ]
  def GetLVMVolumeIdentifiers(self, volume_system, volume_identifiers):
    """Retrieves LVM volume identifiers.
    This method can be used to prompt the user to provide LVM volume
    identifiers.
    Args:
      volume_system (LVMVolumeSystem): volume system.
      volume_identifiers (list[str]): volume identifiers including prefix.
    Returns:
      list[str]: selected volume identifiers including prefix or None.
    """
    prefix = 'lvm'
    return [
        '{0:s}{1:d}'.format(prefix, volume_index)
        for volume_index in range(1, volume_system.number_of_volumes + 1)
    ]
  def GetPartitionIdentifiers(self, volume_system, volume_identifiers):
    """Retrieves partition identifiers.
--- a/dfdewey/utils/image_processor_test.py
+++ b/dfdewey/utils/image_processor_test.py
@ -225,7 +225,7 @@ class ImageProcessorTest(unittest.TestCase):
    mock_elasticsearch.import_event.assert_called_once_with(
        index_name, event=json_record)
-  @mock.patch('elasticsearch.client.IndicesClient.create')
+  @mock.patch('elasticsearch.client.IndicesClient')
  @mock.patch('dfdewey.utils.image_processor.ImageProcessor._index_record')
  @mock.patch('dfdewey.datastore.elastic.ElasticsearchDataStore.index_exists')
  @mock.patch('dfdewey.datastore.elastic.ElasticsearchDataStore.import_event')
@ -244,6 +244,18 @@ class ImageProcessorTest(unittest.TestCase):
    image_processor._index_strings()
    mock_index_record.assert_not_called()
    # Test reindex flag
    image_processor.options.reindex = True
    image_processor._index_strings()
    mock_create_index.assert_called_once_with(
        index_name=''.join(('es', TEST_IMAGE_HASH)))
    self.assertEqual(mock_index_record.call_count, 3)
    mock_import_event.assert_called_once()
    image_processor.options.reindex = False
    mock_create_index.reset_mock()
    mock_index_record.reset_mock()
    mock_import_event.reset_mock()
    # Test new index
    mock_index_exists.return_value = False
    mock_index_record.return_value = 10000000
--- a/dfvfs_requirements.txt
+++ b/dfvfs_requirements.txt
@ -15,12 +15,13 @@ libfsxfs-python >= 20201114
 libfvde-python >= 20160719
 libfwnt-python >= 20160418
 libluksde-python >= 20200101
-libqcow-python >= 20131204
+libqcow-python >= 20201213
 libsigscan-python >= 20191221
 libsmdev-python >= 20140529
 libsmraw-python >= 20140612
 libvhdi-python >= 20201014
 libvmdk-python >= 20140421
 libvsgpt-python >= 20210207
 libvshadow-python >= 20160109
 libvslvm-python >= 20160109
 pytsk3 >= 20160721
--- a/docs/usage.md
+++ b/docs/usage.md
@ -1,7 +1,7 @@
 # Using dfDewey
 ```shell
-usage: dfdcli.py [-h] [--no_base64] [--no_gzip] [--no_zip] [-s SEARCH] [--search_list SEARCH_LIST] case [image]
+usage: dfdcli.py [-h] [--no_base64] [--no_gzip] [--no_zip] [--reindex] [-s SEARCH] [--search_list SEARCH_LIST] case [image]
 positional arguments:
  case                  case ID
@ -12,11 +12,11 @@ optional arguments:
  --no_base64           don't decode base64
  --no_gzip             don't decompress gzip
  --no_zip              don't decompress zip
  --reindex             recreate index (will delete existing index)
  -s SEARCH, --search SEARCH
                        search query
  --search_list SEARCH_LIST
                        file with search queries
 ```
 ## Docker