Refactoring CLI, processing and searching
This commit is contained in:
parent
58d306b6e0
commit
ccc9edfc6f
2 changed files with 198 additions and 3 deletions
|
@ -24,6 +24,7 @@ from dfvfs.helpers import volume_scanner
|
|||
from dfvfs.lib import definitions as dfvfs_definitions
|
||||
from dfvfs.lib import errors as dfvfs_errors
|
||||
from dfvfs.resolver import resolver
|
||||
from dfvfs.volume import tsk_volume_system
|
||||
import pytsk3
|
||||
|
||||
from dfdewey.datastore.elastic import ElasticsearchDataStore
|
||||
|
@ -71,6 +72,7 @@ class FileEntryScanner(volume_scanner.VolumeScanner):
|
|||
self._datastore = None
|
||||
self._list_only_files = False
|
||||
self._rows = []
|
||||
self._volumes = {}
|
||||
|
||||
def _get_display_path(self, path_spec, path_segments, data_stream_name):
|
||||
"""Retrieves a path to display.
|
||||
|
@ -116,6 +118,24 @@ class FileEntryScanner(volume_scanner.VolumeScanner):
|
|||
inode = getattr(path_spec, 'inode', None)
|
||||
return inode
|
||||
|
||||
def _get_tsk_partition_path_spec(self, path_spec):
|
||||
"""Gets the path spec for the TSK partition.
|
||||
|
||||
Args:
|
||||
path_spec (dfvfs.PathSpec): path spec of the volume.
|
||||
|
||||
Returns:
|
||||
TSK partition path_spec or None.
|
||||
"""
|
||||
partition_path_spec = None
|
||||
while path_spec.HasParent():
|
||||
type_indicator = path_spec.type_indicator
|
||||
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION:
|
||||
partition_path_spec = path_spec
|
||||
break
|
||||
path_spec = path_spec.parent
|
||||
return partition_path_spec
|
||||
|
||||
def _get_volume_location(self, path_spec):
|
||||
"""Gets volume location / identifier for the given path spec.
|
||||
|
||||
|
@ -174,6 +194,46 @@ class FileEntryScanner(volume_scanner.VolumeScanner):
|
|||
self._list_file_entry(
|
||||
file_system, sub_file_entry, path_segments, location)
|
||||
|
||||
def get_volume_extents(self, image_path):
|
||||
"""Gets the extents of all volumes.
|
||||
|
||||
Args:
|
||||
image_path (str): path of the source image.
|
||||
|
||||
Returns:
|
||||
Volume location / identifier, offset, and size for all volumes.
|
||||
"""
|
||||
if not self._volumes or self._source_path != image_path:
|
||||
base_path_specs = self.GetBasePathSpecs(image_path)
|
||||
|
||||
for path_spec in base_path_specs:
|
||||
partition_path_spec = self._get_tsk_partition_path_spec(path_spec)
|
||||
if not partition_path_spec:
|
||||
location = getattr(path_spec, 'location', None)
|
||||
self._volumes[location] = {'start': 0, 'end': None}
|
||||
else:
|
||||
location = getattr(partition_path_spec, 'location', None)
|
||||
partition_offset = None
|
||||
partition_size = None
|
||||
|
||||
volume_system = tsk_volume_system.TSKVolumeSystem()
|
||||
try:
|
||||
volume_system.Open(partition_path_spec)
|
||||
volume_identifier = location.replace('/', '')
|
||||
volume = volume_system.GetVolumeByIdentifier(volume_identifier)
|
||||
|
||||
partition_offset = volume.extents[0].offset
|
||||
partition_size = volume.extents[0].size
|
||||
except dfvfs_errors.VolumeSystemError as e:
|
||||
log.error('Could not process partition: %s', e)
|
||||
|
||||
self._volumes[location] = {
|
||||
'start': partition_offset,
|
||||
'end': partition_offset + partition_size
|
||||
}
|
||||
|
||||
return self._volumes
|
||||
|
||||
def parse_file_entries(self, base_path_specs, datastore):
|
||||
"""Parses file entries in the base path specification.
|
||||
|
||||
|
|
|
@ -17,10 +17,13 @@
|
|||
import logging
|
||||
import os
|
||||
|
||||
from dfvfs.lib import errors as dfvfs_errors
|
||||
import pytsk3
|
||||
from tabulate import tabulate
|
||||
|
||||
from dfdewey.datastore.elastic import ElasticsearchDataStore
|
||||
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
||||
from dfdewey.utils.image_processor import FileEntryScanner, UnattendedVolumeScannerMediator
|
||||
|
||||
log = logging.getLogger('dfdewey.index_searcher')
|
||||
|
||||
|
@ -47,7 +50,7 @@ class _SearchHit():
|
|||
"""
|
||||
search_hit_dict = {}
|
||||
search_hit_dict['Offset'] = self.offset
|
||||
search_hit_dict['Filename'] = self.filename
|
||||
search_hit_dict['Filename (inode)'] = self.filename
|
||||
search_hit_dict['String'] = self.data
|
||||
|
||||
return search_hit_dict
|
||||
|
@ -64,6 +67,7 @@ class IndexSearcher():
|
|||
self.image = image
|
||||
self.images = {}
|
||||
self.postgresql = PostgresqlDataStore()
|
||||
self.scanner = None
|
||||
|
||||
if image != 'all':
|
||||
self.image = os.path.abspath(self.image)
|
||||
|
@ -83,6 +87,93 @@ class IndexSearcher():
|
|||
for image_hash, image_path in images:
|
||||
self.images[image_hash] = image_path
|
||||
|
||||
def _get_filenames_from_inode(self, inode, location):
|
||||
"""Gets filename(s) from an inode number.
|
||||
|
||||
Args:
|
||||
inode: Inode number of target file
|
||||
location: Partition number
|
||||
|
||||
Returns:
|
||||
Filename of given inode or None
|
||||
"""
|
||||
results = self.postgresql.query((
|
||||
'SELECT filename FROM files '
|
||||
'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location))
|
||||
filenames = []
|
||||
for result in results:
|
||||
filenames.append(result[0])
|
||||
return filenames
|
||||
|
||||
def _get_filename_from_offset(self, image_path, image_hash, offset):
|
||||
"""Gets filename given a byte offset within an image.
|
||||
|
||||
Args:
|
||||
image_path: source image path.
|
||||
image_hash: source image hash.
|
||||
offset: byte offset within the image.
|
||||
|
||||
Returns:
|
||||
Filename allocated to the given offset, or None.
|
||||
"""
|
||||
filenames = []
|
||||
|
||||
database_name = ''.join(('fs', image_hash))
|
||||
self.postgresql.switch_database(db_name=database_name)
|
||||
|
||||
volume_extents = {}
|
||||
try:
|
||||
if not self.scanner:
|
||||
mediator = UnattendedVolumeScannerMediator()
|
||||
self.scanner = FileEntryScanner(mediator=mediator)
|
||||
volume_extents = self.scanner.get_volume_extents(image_path)
|
||||
except dfvfs_errors.ScannerError as e:
|
||||
log.error('Error scanning for partitions: %s', e)
|
||||
|
||||
hit_location = None
|
||||
partition_offset = None
|
||||
for location, extent in volume_extents.items():
|
||||
if not extent['end']:
|
||||
# Image is of a single volume
|
||||
hit_location = location
|
||||
partition_offset = extent['start']
|
||||
elif extent['start'] <= offset < extent['end']:
|
||||
hit_location = location
|
||||
partition_offset = extent['start']
|
||||
|
||||
if partition_offset is not None:
|
||||
try:
|
||||
img = pytsk3.Img_Info(image_path)
|
||||
filesystem = pytsk3.FS_Info(img, offset=partition_offset)
|
||||
block_size = filesystem.info.block_size
|
||||
except TypeError as e:
|
||||
log.error('Error opening image: %s', e)
|
||||
|
||||
inodes = self._get_inodes(
|
||||
int((offset - partition_offset) / block_size), hit_location)
|
||||
|
||||
if inodes:
|
||||
for i in inodes:
|
||||
inode = i[0]
|
||||
# Account for resident files
|
||||
if (i[0] == 0 and
|
||||
filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT):
|
||||
mft_record_size_offset = 0x40 + partition_offset
|
||||
mft_record_size = int.from_bytes(
|
||||
img.read(mft_record_size_offset, 1), 'little', signed=True)
|
||||
if mft_record_size < 0:
|
||||
mft_record_size = 2**(mft_record_size * -1)
|
||||
else:
|
||||
mft_record_size = mft_record_size * block_size
|
||||
inode = self._get_ntfs_resident_inode((offset - partition_offset),
|
||||
filesystem, mft_record_size)
|
||||
|
||||
inode_filenames = self._get_filenames_from_inode(inode, hit_location)
|
||||
filename = ' | '.join(inode_filenames)
|
||||
filenames.append('{0:s} ({1:d})'.format(filename, inode))
|
||||
|
||||
return filenames
|
||||
|
||||
def _get_image_hash(self):
|
||||
"""Get an image hash from the datastore.
|
||||
|
||||
|
@ -92,7 +183,49 @@ class IndexSearcher():
|
|||
image_hash = self.postgresql.query_single_row(
|
||||
'SELECT image_hash FROM images WHERE image_path = \'{0:s}\''.format(
|
||||
self.image))
|
||||
self.images[image_hash[0]] = self.image
|
||||
if image_hash:
|
||||
self.images[image_hash[0]] = self.image
|
||||
|
||||
def _get_inodes(self, block, location):
|
||||
"""Gets inode numbers for a block offset.
|
||||
|
||||
Args:
|
||||
block (int): block offset within the image.
|
||||
location (str): Partition location / identifier.
|
||||
|
||||
Returns:
|
||||
Inode number(s) of the given block or None.
|
||||
"""
|
||||
inodes = self.postgresql.query(
|
||||
('SELECT inum FROM blocks '
|
||||
'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location))
|
||||
return inodes
|
||||
|
||||
def _get_ntfs_resident_inode(self, offset, filesystem, mft_record_size):
|
||||
"""Gets the inode number associated with NTFS $MFT resident data.
|
||||
|
||||
Args:
|
||||
offset: data offset within volume.
|
||||
filesystem: pytsk3 FS_INFO object.
|
||||
mft_record_size: size of each $MFT entry.
|
||||
|
||||
Returns:
|
||||
inode number of resident data
|
||||
"""
|
||||
block_size = filesystem.info.block_size
|
||||
offset_block = int(offset / block_size)
|
||||
|
||||
inode = filesystem.open_meta(0)
|
||||
mft_entry = 0
|
||||
for attr in inode:
|
||||
for run in attr:
|
||||
for block in range(run.len):
|
||||
if run.addr + block == offset_block:
|
||||
mft_entry += int(
|
||||
(offset - (offset_block * block_size)) / mft_record_size)
|
||||
return mft_entry
|
||||
mft_entry += int(block_size / mft_record_size)
|
||||
return 0
|
||||
|
||||
def search(self, query):
|
||||
"""Run a single query.
|
||||
|
@ -115,7 +248,9 @@ class IndexSearcher():
|
|||
if result['_source']['file_offset']:
|
||||
offset = '-'.join((offset, result['_source']['file_offset']))
|
||||
hit.offset = offset
|
||||
# TODO (dfjxs): Filenames
|
||||
filenames = self._get_filename_from_offset(
|
||||
image_path, image_hash, result['_source']['offset'])
|
||||
hit.filename = '\n'.join(filenames)
|
||||
hit.data = result['_source']['data'].strip()
|
||||
hits.append(hit.copy_to_dict())
|
||||
output = tabulate(hits, headers='keys', tablefmt='simple')
|
||||
|
|
Loading…
Reference in a new issue