From ccc9edfc6f74988d7ea8289ced6ecf6f4585313f Mon Sep 17 00:00:00 2001 From: Jason Solomon Date: Fri, 20 Nov 2020 14:25:36 +1100 Subject: [PATCH] Refactoring CLI, processing and searching --- dfdewey/utils/image_processor.py | 60 +++++++++++++ dfdewey/utils/index_searcher.py | 141 ++++++++++++++++++++++++++++++- 2 files changed, 198 insertions(+), 3 deletions(-) diff --git a/dfdewey/utils/image_processor.py b/dfdewey/utils/image_processor.py index 48718eb..b66305e 100644 --- a/dfdewey/utils/image_processor.py +++ b/dfdewey/utils/image_processor.py @@ -24,6 +24,7 @@ from dfvfs.helpers import volume_scanner from dfvfs.lib import definitions as dfvfs_definitions from dfvfs.lib import errors as dfvfs_errors from dfvfs.resolver import resolver +from dfvfs.volume import tsk_volume_system import pytsk3 from dfdewey.datastore.elastic import ElasticsearchDataStore @@ -71,6 +72,7 @@ class FileEntryScanner(volume_scanner.VolumeScanner): self._datastore = None self._list_only_files = False self._rows = [] + self._volumes = {} def _get_display_path(self, path_spec, path_segments, data_stream_name): """Retrieves a path to display. @@ -116,6 +118,24 @@ class FileEntryScanner(volume_scanner.VolumeScanner): inode = getattr(path_spec, 'inode', None) return inode + def _get_tsk_partition_path_spec(self, path_spec): + """Gets the path spec for the TSK partition. + + Args: + path_spec (dfvfs.PathSpec): path spec of the volume. + + Returns: + TSK partition path_spec or None. + """ + partition_path_spec = None + while path_spec.HasParent(): + type_indicator = path_spec.type_indicator + if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION: + partition_path_spec = path_spec + break + path_spec = path_spec.parent + return partition_path_spec + def _get_volume_location(self, path_spec): """Gets volume location / identifier for the given path spec. @@ -174,6 +194,46 @@ class FileEntryScanner(volume_scanner.VolumeScanner): self._list_file_entry( file_system, sub_file_entry, path_segments, location) + def get_volume_extents(self, image_path): + """Gets the extents of all volumes. + + Args: + image_path (str): path of the source image. + + Returns: + Volume location / identifier, offset, and size for all volumes. + """ + if not self._volumes or self._source_path != image_path: + base_path_specs = self.GetBasePathSpecs(image_path) + + for path_spec in base_path_specs: + partition_path_spec = self._get_tsk_partition_path_spec(path_spec) + if not partition_path_spec: + location = getattr(path_spec, 'location', None) + self._volumes[location] = {'start': 0, 'end': None} + else: + location = getattr(partition_path_spec, 'location', None) + partition_offset = None + partition_size = None + + volume_system = tsk_volume_system.TSKVolumeSystem() + try: + volume_system.Open(partition_path_spec) + volume_identifier = location.replace('/', '') + volume = volume_system.GetVolumeByIdentifier(volume_identifier) + + partition_offset = volume.extents[0].offset + partition_size = volume.extents[0].size + except dfvfs_errors.VolumeSystemError as e: + log.error('Could not process partition: %s', e) + + self._volumes[location] = { + 'start': partition_offset, + 'end': partition_offset + partition_size + } + + return self._volumes + def parse_file_entries(self, base_path_specs, datastore): """Parses file entries in the base path specification. diff --git a/dfdewey/utils/index_searcher.py b/dfdewey/utils/index_searcher.py index 10cccfb..4b9ab1a 100644 --- a/dfdewey/utils/index_searcher.py +++ b/dfdewey/utils/index_searcher.py @@ -17,10 +17,13 @@ import logging import os +from dfvfs.lib import errors as dfvfs_errors +import pytsk3 from tabulate import tabulate from dfdewey.datastore.elastic import ElasticsearchDataStore from dfdewey.datastore.postgresql import PostgresqlDataStore +from dfdewey.utils.image_processor import FileEntryScanner, UnattendedVolumeScannerMediator log = logging.getLogger('dfdewey.index_searcher') @@ -47,7 +50,7 @@ class _SearchHit(): """ search_hit_dict = {} search_hit_dict['Offset'] = self.offset - search_hit_dict['Filename'] = self.filename + search_hit_dict['Filename (inode)'] = self.filename search_hit_dict['String'] = self.data return search_hit_dict @@ -64,6 +67,7 @@ class IndexSearcher(): self.image = image self.images = {} self.postgresql = PostgresqlDataStore() + self.scanner = None if image != 'all': self.image = os.path.abspath(self.image) @@ -83,6 +87,93 @@ class IndexSearcher(): for image_hash, image_path in images: self.images[image_hash] = image_path + def _get_filenames_from_inode(self, inode, location): + """Gets filename(s) from an inode number. + + Args: + inode: Inode number of target file + location: Partition number + + Returns: + Filename of given inode or None + """ + results = self.postgresql.query(( + 'SELECT filename FROM files ' + 'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location)) + filenames = [] + for result in results: + filenames.append(result[0]) + return filenames + + def _get_filename_from_offset(self, image_path, image_hash, offset): + """Gets filename given a byte offset within an image. + + Args: + image_path: source image path. + image_hash: source image hash. + offset: byte offset within the image. + + Returns: + Filename allocated to the given offset, or None. + """ + filenames = [] + + database_name = ''.join(('fs', image_hash)) + self.postgresql.switch_database(db_name=database_name) + + volume_extents = {} + try: + if not self.scanner: + mediator = UnattendedVolumeScannerMediator() + self.scanner = FileEntryScanner(mediator=mediator) + volume_extents = self.scanner.get_volume_extents(image_path) + except dfvfs_errors.ScannerError as e: + log.error('Error scanning for partitions: %s', e) + + hit_location = None + partition_offset = None + for location, extent in volume_extents.items(): + if not extent['end']: + # Image is of a single volume + hit_location = location + partition_offset = extent['start'] + elif extent['start'] <= offset < extent['end']: + hit_location = location + partition_offset = extent['start'] + + if partition_offset is not None: + try: + img = pytsk3.Img_Info(image_path) + filesystem = pytsk3.FS_Info(img, offset=partition_offset) + block_size = filesystem.info.block_size + except TypeError as e: + log.error('Error opening image: %s', e) + + inodes = self._get_inodes( + int((offset - partition_offset) / block_size), hit_location) + + if inodes: + for i in inodes: + inode = i[0] + # Account for resident files + if (i[0] == 0 and + filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT): + mft_record_size_offset = 0x40 + partition_offset + mft_record_size = int.from_bytes( + img.read(mft_record_size_offset, 1), 'little', signed=True) + if mft_record_size < 0: + mft_record_size = 2**(mft_record_size * -1) + else: + mft_record_size = mft_record_size * block_size + inode = self._get_ntfs_resident_inode((offset - partition_offset), + filesystem, mft_record_size) + + inode_filenames = self._get_filenames_from_inode(inode, hit_location) + filename = ' | '.join(inode_filenames) + filenames.append('{0:s} ({1:d})'.format(filename, inode)) + + return filenames + def _get_image_hash(self): """Get an image hash from the datastore. @@ -92,7 +183,49 @@ class IndexSearcher(): image_hash = self.postgresql.query_single_row( 'SELECT image_hash FROM images WHERE image_path = \'{0:s}\''.format( self.image)) - self.images[image_hash[0]] = self.image + if image_hash: + self.images[image_hash[0]] = self.image + + def _get_inodes(self, block, location): + """Gets inode numbers for a block offset. + + Args: + block (int): block offset within the image. + location (str): Partition location / identifier. + + Returns: + Inode number(s) of the given block or None. + """ + inodes = self.postgresql.query( + ('SELECT inum FROM blocks ' + 'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location)) + return inodes + + def _get_ntfs_resident_inode(self, offset, filesystem, mft_record_size): + """Gets the inode number associated with NTFS $MFT resident data. + + Args: + offset: data offset within volume. + filesystem: pytsk3 FS_INFO object. + mft_record_size: size of each $MFT entry. + + Returns: + inode number of resident data + """ + block_size = filesystem.info.block_size + offset_block = int(offset / block_size) + + inode = filesystem.open_meta(0) + mft_entry = 0 + for attr in inode: + for run in attr: + for block in range(run.len): + if run.addr + block == offset_block: + mft_entry += int( + (offset - (offset_block * block_size)) / mft_record_size) + return mft_entry + mft_entry += int(block_size / mft_record_size) + return 0 def search(self, query): """Run a single query. @@ -115,7 +248,9 @@ class IndexSearcher(): if result['_source']['file_offset']: offset = '-'.join((offset, result['_source']['file_offset'])) hit.offset = offset - # TODO (dfjxs): Filenames + filenames = self._get_filename_from_offset( + image_path, image_hash, result['_source']['offset']) + hit.filename = '\n'.join(filenames) hit.data = result['_source']['data'].strip() hits.append(hit.copy_to_dict()) output = tabulate(hits, headers='keys', tablefmt='simple')