Refactoring CLI, processing and searching
This commit is contained in:
parent
58d306b6e0
commit
ccc9edfc6f
2 changed files with 198 additions and 3 deletions
|
@ -24,6 +24,7 @@ from dfvfs.helpers import volume_scanner
|
||||||
from dfvfs.lib import definitions as dfvfs_definitions
|
from dfvfs.lib import definitions as dfvfs_definitions
|
||||||
from dfvfs.lib import errors as dfvfs_errors
|
from dfvfs.lib import errors as dfvfs_errors
|
||||||
from dfvfs.resolver import resolver
|
from dfvfs.resolver import resolver
|
||||||
|
from dfvfs.volume import tsk_volume_system
|
||||||
import pytsk3
|
import pytsk3
|
||||||
|
|
||||||
from dfdewey.datastore.elastic import ElasticsearchDataStore
|
from dfdewey.datastore.elastic import ElasticsearchDataStore
|
||||||
|
@ -71,6 +72,7 @@ class FileEntryScanner(volume_scanner.VolumeScanner):
|
||||||
self._datastore = None
|
self._datastore = None
|
||||||
self._list_only_files = False
|
self._list_only_files = False
|
||||||
self._rows = []
|
self._rows = []
|
||||||
|
self._volumes = {}
|
||||||
|
|
||||||
def _get_display_path(self, path_spec, path_segments, data_stream_name):
|
def _get_display_path(self, path_spec, path_segments, data_stream_name):
|
||||||
"""Retrieves a path to display.
|
"""Retrieves a path to display.
|
||||||
|
@ -116,6 +118,24 @@ class FileEntryScanner(volume_scanner.VolumeScanner):
|
||||||
inode = getattr(path_spec, 'inode', None)
|
inode = getattr(path_spec, 'inode', None)
|
||||||
return inode
|
return inode
|
||||||
|
|
||||||
|
def _get_tsk_partition_path_spec(self, path_spec):
|
||||||
|
"""Gets the path spec for the TSK partition.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path_spec (dfvfs.PathSpec): path spec of the volume.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
TSK partition path_spec or None.
|
||||||
|
"""
|
||||||
|
partition_path_spec = None
|
||||||
|
while path_spec.HasParent():
|
||||||
|
type_indicator = path_spec.type_indicator
|
||||||
|
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION:
|
||||||
|
partition_path_spec = path_spec
|
||||||
|
break
|
||||||
|
path_spec = path_spec.parent
|
||||||
|
return partition_path_spec
|
||||||
|
|
||||||
def _get_volume_location(self, path_spec):
|
def _get_volume_location(self, path_spec):
|
||||||
"""Gets volume location / identifier for the given path spec.
|
"""Gets volume location / identifier for the given path spec.
|
||||||
|
|
||||||
|
@ -174,6 +194,46 @@ class FileEntryScanner(volume_scanner.VolumeScanner):
|
||||||
self._list_file_entry(
|
self._list_file_entry(
|
||||||
file_system, sub_file_entry, path_segments, location)
|
file_system, sub_file_entry, path_segments, location)
|
||||||
|
|
||||||
|
def get_volume_extents(self, image_path):
|
||||||
|
"""Gets the extents of all volumes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path (str): path of the source image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Volume location / identifier, offset, and size for all volumes.
|
||||||
|
"""
|
||||||
|
if not self._volumes or self._source_path != image_path:
|
||||||
|
base_path_specs = self.GetBasePathSpecs(image_path)
|
||||||
|
|
||||||
|
for path_spec in base_path_specs:
|
||||||
|
partition_path_spec = self._get_tsk_partition_path_spec(path_spec)
|
||||||
|
if not partition_path_spec:
|
||||||
|
location = getattr(path_spec, 'location', None)
|
||||||
|
self._volumes[location] = {'start': 0, 'end': None}
|
||||||
|
else:
|
||||||
|
location = getattr(partition_path_spec, 'location', None)
|
||||||
|
partition_offset = None
|
||||||
|
partition_size = None
|
||||||
|
|
||||||
|
volume_system = tsk_volume_system.TSKVolumeSystem()
|
||||||
|
try:
|
||||||
|
volume_system.Open(partition_path_spec)
|
||||||
|
volume_identifier = location.replace('/', '')
|
||||||
|
volume = volume_system.GetVolumeByIdentifier(volume_identifier)
|
||||||
|
|
||||||
|
partition_offset = volume.extents[0].offset
|
||||||
|
partition_size = volume.extents[0].size
|
||||||
|
except dfvfs_errors.VolumeSystemError as e:
|
||||||
|
log.error('Could not process partition: %s', e)
|
||||||
|
|
||||||
|
self._volumes[location] = {
|
||||||
|
'start': partition_offset,
|
||||||
|
'end': partition_offset + partition_size
|
||||||
|
}
|
||||||
|
|
||||||
|
return self._volumes
|
||||||
|
|
||||||
def parse_file_entries(self, base_path_specs, datastore):
|
def parse_file_entries(self, base_path_specs, datastore):
|
||||||
"""Parses file entries in the base path specification.
|
"""Parses file entries in the base path specification.
|
||||||
|
|
||||||
|
|
|
@ -17,10 +17,13 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from dfvfs.lib import errors as dfvfs_errors
|
||||||
|
import pytsk3
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
|
|
||||||
from dfdewey.datastore.elastic import ElasticsearchDataStore
|
from dfdewey.datastore.elastic import ElasticsearchDataStore
|
||||||
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
||||||
|
from dfdewey.utils.image_processor import FileEntryScanner, UnattendedVolumeScannerMediator
|
||||||
|
|
||||||
log = logging.getLogger('dfdewey.index_searcher')
|
log = logging.getLogger('dfdewey.index_searcher')
|
||||||
|
|
||||||
|
@ -47,7 +50,7 @@ class _SearchHit():
|
||||||
"""
|
"""
|
||||||
search_hit_dict = {}
|
search_hit_dict = {}
|
||||||
search_hit_dict['Offset'] = self.offset
|
search_hit_dict['Offset'] = self.offset
|
||||||
search_hit_dict['Filename'] = self.filename
|
search_hit_dict['Filename (inode)'] = self.filename
|
||||||
search_hit_dict['String'] = self.data
|
search_hit_dict['String'] = self.data
|
||||||
|
|
||||||
return search_hit_dict
|
return search_hit_dict
|
||||||
|
@ -64,6 +67,7 @@ class IndexSearcher():
|
||||||
self.image = image
|
self.image = image
|
||||||
self.images = {}
|
self.images = {}
|
||||||
self.postgresql = PostgresqlDataStore()
|
self.postgresql = PostgresqlDataStore()
|
||||||
|
self.scanner = None
|
||||||
|
|
||||||
if image != 'all':
|
if image != 'all':
|
||||||
self.image = os.path.abspath(self.image)
|
self.image = os.path.abspath(self.image)
|
||||||
|
@ -83,6 +87,93 @@ class IndexSearcher():
|
||||||
for image_hash, image_path in images:
|
for image_hash, image_path in images:
|
||||||
self.images[image_hash] = image_path
|
self.images[image_hash] = image_path
|
||||||
|
|
||||||
|
def _get_filenames_from_inode(self, inode, location):
|
||||||
|
"""Gets filename(s) from an inode number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inode: Inode number of target file
|
||||||
|
location: Partition number
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Filename of given inode or None
|
||||||
|
"""
|
||||||
|
results = self.postgresql.query((
|
||||||
|
'SELECT filename FROM files '
|
||||||
|
'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location))
|
||||||
|
filenames = []
|
||||||
|
for result in results:
|
||||||
|
filenames.append(result[0])
|
||||||
|
return filenames
|
||||||
|
|
||||||
|
def _get_filename_from_offset(self, image_path, image_hash, offset):
|
||||||
|
"""Gets filename given a byte offset within an image.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: source image path.
|
||||||
|
image_hash: source image hash.
|
||||||
|
offset: byte offset within the image.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Filename allocated to the given offset, or None.
|
||||||
|
"""
|
||||||
|
filenames = []
|
||||||
|
|
||||||
|
database_name = ''.join(('fs', image_hash))
|
||||||
|
self.postgresql.switch_database(db_name=database_name)
|
||||||
|
|
||||||
|
volume_extents = {}
|
||||||
|
try:
|
||||||
|
if not self.scanner:
|
||||||
|
mediator = UnattendedVolumeScannerMediator()
|
||||||
|
self.scanner = FileEntryScanner(mediator=mediator)
|
||||||
|
volume_extents = self.scanner.get_volume_extents(image_path)
|
||||||
|
except dfvfs_errors.ScannerError as e:
|
||||||
|
log.error('Error scanning for partitions: %s', e)
|
||||||
|
|
||||||
|
hit_location = None
|
||||||
|
partition_offset = None
|
||||||
|
for location, extent in volume_extents.items():
|
||||||
|
if not extent['end']:
|
||||||
|
# Image is of a single volume
|
||||||
|
hit_location = location
|
||||||
|
partition_offset = extent['start']
|
||||||
|
elif extent['start'] <= offset < extent['end']:
|
||||||
|
hit_location = location
|
||||||
|
partition_offset = extent['start']
|
||||||
|
|
||||||
|
if partition_offset is not None:
|
||||||
|
try:
|
||||||
|
img = pytsk3.Img_Info(image_path)
|
||||||
|
filesystem = pytsk3.FS_Info(img, offset=partition_offset)
|
||||||
|
block_size = filesystem.info.block_size
|
||||||
|
except TypeError as e:
|
||||||
|
log.error('Error opening image: %s', e)
|
||||||
|
|
||||||
|
inodes = self._get_inodes(
|
||||||
|
int((offset - partition_offset) / block_size), hit_location)
|
||||||
|
|
||||||
|
if inodes:
|
||||||
|
for i in inodes:
|
||||||
|
inode = i[0]
|
||||||
|
# Account for resident files
|
||||||
|
if (i[0] == 0 and
|
||||||
|
filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT):
|
||||||
|
mft_record_size_offset = 0x40 + partition_offset
|
||||||
|
mft_record_size = int.from_bytes(
|
||||||
|
img.read(mft_record_size_offset, 1), 'little', signed=True)
|
||||||
|
if mft_record_size < 0:
|
||||||
|
mft_record_size = 2**(mft_record_size * -1)
|
||||||
|
else:
|
||||||
|
mft_record_size = mft_record_size * block_size
|
||||||
|
inode = self._get_ntfs_resident_inode((offset - partition_offset),
|
||||||
|
filesystem, mft_record_size)
|
||||||
|
|
||||||
|
inode_filenames = self._get_filenames_from_inode(inode, hit_location)
|
||||||
|
filename = ' | '.join(inode_filenames)
|
||||||
|
filenames.append('{0:s} ({1:d})'.format(filename, inode))
|
||||||
|
|
||||||
|
return filenames
|
||||||
|
|
||||||
def _get_image_hash(self):
|
def _get_image_hash(self):
|
||||||
"""Get an image hash from the datastore.
|
"""Get an image hash from the datastore.
|
||||||
|
|
||||||
|
@ -92,7 +183,49 @@ class IndexSearcher():
|
||||||
image_hash = self.postgresql.query_single_row(
|
image_hash = self.postgresql.query_single_row(
|
||||||
'SELECT image_hash FROM images WHERE image_path = \'{0:s}\''.format(
|
'SELECT image_hash FROM images WHERE image_path = \'{0:s}\''.format(
|
||||||
self.image))
|
self.image))
|
||||||
self.images[image_hash[0]] = self.image
|
if image_hash:
|
||||||
|
self.images[image_hash[0]] = self.image
|
||||||
|
|
||||||
|
def _get_inodes(self, block, location):
|
||||||
|
"""Gets inode numbers for a block offset.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
block (int): block offset within the image.
|
||||||
|
location (str): Partition location / identifier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Inode number(s) of the given block or None.
|
||||||
|
"""
|
||||||
|
inodes = self.postgresql.query(
|
||||||
|
('SELECT inum FROM blocks '
|
||||||
|
'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location))
|
||||||
|
return inodes
|
||||||
|
|
||||||
|
def _get_ntfs_resident_inode(self, offset, filesystem, mft_record_size):
|
||||||
|
"""Gets the inode number associated with NTFS $MFT resident data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
offset: data offset within volume.
|
||||||
|
filesystem: pytsk3 FS_INFO object.
|
||||||
|
mft_record_size: size of each $MFT entry.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
inode number of resident data
|
||||||
|
"""
|
||||||
|
block_size = filesystem.info.block_size
|
||||||
|
offset_block = int(offset / block_size)
|
||||||
|
|
||||||
|
inode = filesystem.open_meta(0)
|
||||||
|
mft_entry = 0
|
||||||
|
for attr in inode:
|
||||||
|
for run in attr:
|
||||||
|
for block in range(run.len):
|
||||||
|
if run.addr + block == offset_block:
|
||||||
|
mft_entry += int(
|
||||||
|
(offset - (offset_block * block_size)) / mft_record_size)
|
||||||
|
return mft_entry
|
||||||
|
mft_entry += int(block_size / mft_record_size)
|
||||||
|
return 0
|
||||||
|
|
||||||
def search(self, query):
|
def search(self, query):
|
||||||
"""Run a single query.
|
"""Run a single query.
|
||||||
|
@ -115,7 +248,9 @@ class IndexSearcher():
|
||||||
if result['_source']['file_offset']:
|
if result['_source']['file_offset']:
|
||||||
offset = '-'.join((offset, result['_source']['file_offset']))
|
offset = '-'.join((offset, result['_source']['file_offset']))
|
||||||
hit.offset = offset
|
hit.offset = offset
|
||||||
# TODO (dfjxs): Filenames
|
filenames = self._get_filename_from_offset(
|
||||||
|
image_path, image_hash, result['_source']['offset'])
|
||||||
|
hit.filename = '\n'.join(filenames)
|
||||||
hit.data = result['_source']['data'].strip()
|
hit.data = result['_source']['data'].strip()
|
||||||
hits.append(hit.copy_to_dict())
|
hits.append(hit.copy_to_dict())
|
||||||
output = tabulate(hits, headers='keys', tablefmt='simple')
|
output = tabulate(hits, headers='keys', tablefmt='simple')
|
||||||
|
|
Loading…
Reference in a new issue