Merge pull request #5 from google/refactor

Refactor CLI, processing, and searching
This commit is contained in:
Jason 2020-11-20 15:07:20 +11:00 committed by GitHub
commit 7ac54eccc2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 1029 additions and 654 deletions

View file

@ -16,16 +16,11 @@
import codecs
import collections
import logging
from elasticsearch import Elasticsearch
from elasticsearch import exceptions
import six
# Setup logging
es_logger = logging.getLogger('dfdewey.elasticsearch')
es_logger.setLevel(logging.WARNING)
class ElasticsearchDataStore():
"""Implements the datastore."""
@ -138,6 +133,17 @@ class ElasticsearchDataStore():
return self.import_counter['events']
def index_exists(self, index_name):
"""Check if an index already exists.
Args:
index_name: Name of the index
Returns:
True if the index exists, False if not.
"""
return self.client.indices.exists(index_name)
def search(self, index_id, query_string, size=DEFAULT_SIZE):
"""Search ElasticSearch.

View file

@ -14,15 +14,9 @@
# limitations under the License.
"""PostgreSQL datastore."""
import logging
import psycopg2
from psycopg2 import extras
# Setup logging
postgresql_logger = logging.getLogger('dfdewey.postgresql')
postgresql_logger.setLevel(logging.WARNING)
class PostgresqlDataStore():
"""Implements the datastore."""
@ -31,9 +25,12 @@ class PostgresqlDataStore():
self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
"""Create a PostgreSQL client."""
super().__init__()
self.db = psycopg2.connect(
database=db_name, user='dfdewey', password='password', host=host,
port=port)
try:
self.db = psycopg2.connect(
database=db_name, user='dfdewey', password='password', host=host,
port=port)
except psycopg2.OperationalError as e:
raise RuntimeError('Unable to connect to PostgreSQL.') from e
if autocommit:
self.db.set_isolation_level(
psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
@ -41,9 +38,11 @@ class PostgresqlDataStore():
def __del__(self):
"""Finalise a PostgreSQL client."""
if self.db:
try:
self.db.commit()
self.db.close()
except AttributeError:
pass
def bulk_insert(self, table_spec, rows):
"""Execute a bulk insert into a table.
@ -53,7 +52,9 @@ class PostgresqlDataStore():
rows: Array of value tuples to be inserted
"""
extras.execute_values(
self.cursor, 'INSERT INTO {0:s} VALUES %s'.format(table_spec), rows)
self.cursor,
'INSERT INTO {0:s} VALUES %s ON CONFLICT DO NOTHING'.format(table_spec),
rows)
def execute(self, command):
"""Execute a command in the PostgreSQL database.

View file

@ -40,7 +40,7 @@ class PostgresqlTest(unittest.TestCase):
rows = [(1, 1), (2, 2), (3, 3)]
db.bulk_insert('blocks (block, inum)', rows)
expected_sql = 'INSERT INTO blocks (block, inum) VALUES %s'
expected_sql = 'INSERT INTO blocks (block, inum) VALUES %s ON CONFLICT DO NOTHING'
mock_execute_values.assert_called_once_with(db.cursor, expected_sql, rows)
def test_execute(self):

View file

@ -16,19 +16,20 @@
"""DFDewey Command-Line Interface."""
import argparse
import datetime
import logging
import os
import subprocess
import tempfile
import sys
from dfdewey.datastore.elastic import ElasticsearchDataStore
from dfdewey.datastore.postgresql import PostgresqlDataStore
from dfdewey.utils import image
from dfdewey.utils.image_processor import ImageProcessorOptions, ImageProcessor
from dfdewey.utils.index_searcher import IndexSearcher
STRING_INDEXING_LOG_INTERVAL = 10000000
# Setup logging
log = logging.getLogger('dfdewey')
class _StringRecord(object):
class _StringRecord():
"""Elasticsearch string record.
Attributes:
@ -46,6 +47,30 @@ class _StringRecord(object):
self.data = ''
def main():
"""Main DFDewey function."""
args = parse_args()
setup_logging()
if not args.search and not args.search_list:
# Processing an image since no search terms specified
if args.image == 'all':
log.error('Image must be supplied for processing.')
sys.exit(1)
image_processor_options = ImageProcessorOptions(
not args.no_base64, not args.no_gzip, not args.no_zip)
image_processor = ImageProcessor(
args.case, os.path.abspath(args.image), image_processor_options)
image_processor.process_image()
else:
index_searcher = IndexSearcher(args.case, args.image)
if args.search:
index_searcher.search(args.search)
elif args.search_list:
index_searcher.list_search(args.search_list)
def parse_args():
"""Argument parsing function.
@ -54,8 +79,9 @@ def parse_args():
"""
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--case', required=True, help='case ID')
parser.add_argument('-i', '--image', help='image file')
parser.add_argument('case', help='case ID')
parser.add_argument(
'image', nargs='?', default='all', help='image file (default: \'all\')')
# Indexing args
parser.add_argument(
@ -73,213 +99,17 @@ def parse_args():
return args
def process_image(image_file, case, base64, gunzip, unzip):
"""Image processing function.
def setup_logging():
"""Configure the logger."""
log.propagate = False
log.setLevel(logging.INFO)
Run string extraction, indexing, and filesystem parsing for image file.
Args:
image_file: The image file to be processed
case: Case ID
base64: Flag to decode Base64
gunzip: Flag to decompress gzip
unzip: Flag to decompress zip
"""
image_path = os.path.abspath(image_file)
output_path = tempfile.mkdtemp()
cmd = ['bulk_extractor', '-o', output_path, '-x', 'all', '-e', 'wordlist']
if base64:
cmd.extend(['-e', 'base64'])
if gunzip:
cmd.extend(['-e', 'gzip'])
if unzip:
cmd.extend(['-e', 'zip'])
cmd.extend(['-S', 'strings=YES', '-S', 'word_max=1000000'])
cmd.extend([image_path])
print('Processing start: {0!s}'.format(datetime.datetime.now()))
print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd)))
output = subprocess.check_output(cmd)
md5_offset = output.index(b'MD5') + 19
image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
print('String extraction completed: {0!s}'.format(datetime.datetime.now()))
print('\n*** Parsing image')
image_already_processed = image.initialise_block_db(
image_path, image_hash, case)
print('Parsing completed: {0!s}'.format(datetime.datetime.now()))
if not image_already_processed:
print('\n*** Indexing image')
index_strings(output_path, image_hash)
print('Indexing completed: {0!s}'.format(datetime.datetime.now()))
else:
print('\n*** Image already indexed')
print('Processing complete!')
def index_strings(output_path, image_hash):
"""ElasticSearch indexing function.
Args:
output_path: The output directory from bulk_extractor
image_hash: MD5 of the parsed image
"""
print('\n*** Indexing data...')
es = ElasticsearchDataStore()
index_name = ''.join(('es', image_hash))
index_name = es.create_index(index_name=index_name)
print('Index {0:s} created.'.format(index_name))
string_list = os.path.join(output_path, 'wordlist.txt')
with open(string_list, 'r') as strings:
for line in strings:
if not line.startswith('#'):
string_record = _StringRecord()
string_record.image = image_hash
line = line.split('\t')
offset = line[0]
data = '\t'.join(line[1:])
if offset.find('-') > 0:
offset = offset.split('-')
image_offset = offset[0]
file_offset = '-'.join(offset[1:])
string_record.offset = int(image_offset)
string_record.file_offset = file_offset
else:
string_record.offset = int(offset)
string_record.data = data
records = index_record(es, index_name, string_record)
if records % STRING_INDEXING_LOG_INTERVAL == 0:
print('Indexed {0:d} records...'.format(records))
records = es.import_event(index_name)
print('\n*** Indexed {0:d} strings.'.format(records))
def index_record(es, index_name, string_record):
"""Index a single record.
Args:
es: Elasticsearch datastore
index_name: ID of the elasticsearch index
string_record: String record to be indexed
Returns:
Number of records processed
"""
json_record = {
'image': string_record.image,
'offset': string_record.offset,
'file_offset': string_record.file_offset,
'data': string_record.data
}
return es.import_event(index_name, event=json_record)
def search(query, case, image_path=None, query_list=None):
"""Search function.
Searches either the index for a single image, or indexes for all images
in a given case if no image_path is specified.
Args:
query: The query to run against the index
case: The case to query (if no specific image is provided)
image_path: Optional path of the source image
query_list: Path to a text file containing multiple search terms
"""
case_db = PostgresqlDataStore()
images = {}
if image_path:
image_path = os.path.abspath(image_path)
image_hash = case_db.query_single_row(
'SELECT image_hash FROM images WHERE image_path = \'{0:s}\''.format(
image_path))
images[image_hash[0]] = image_path
else:
print(
'No image specified, searching all images in case \'{0:s}\''.format(
case))
image_hashes = case_db.query(
'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format(
case))
for image_hash in image_hashes:
image_hash = image_hash[0]
image_path = case_db.query_single_row(
'SELECT image_path FROM images WHERE image_hash = \'{0:s}\''.format(
image_hash))
images[image_hash] = image_path[0]
for image_hash, image_path in images.items():
print('\n\nSearching {0:s} ({1:s})'.format(images[image_hash], image_hash))
index = ''.join(('es', image_hash))
if query_list:
with open(query_list, 'r') as search_terms:
print('\n*** Searching for terms in \'{0:s}\'...'.format(query_list))
for term in search_terms:
term = ''.join(('"', term.strip(), '"'))
results = search_index(index, term)
if results['hits']['total']['value'] > 0:
print(
'{0:s} - {1:d} hits'.format(
term, results['hits']['total']['value']))
else:
print('\n*** Searching for \'{0:s}\'...'.format(query))
results = search_index(index, query)
print('Returned {0:d} results:'.format(results['hits']['total']['value']))
for hit in results['hits']['hits']:
filename = image.get_filename_from_offset(
image_path, hit['_source']['image'], int(hit['_source']['offset']))
if hit['_source']['file_offset']:
print(
'Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
'String: {3:s}'.format(
hit['_source']['offset'], filename,
hit['_source']['file_offset'],
hit['_source']['data'].strip()))
else:
print(
'Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
hit['_source']['offset'], filename,
hit['_source']['data'].strip()))
def search_index(index_id, search_query):
"""ElasticSearch search function.
Args:
index_id: The ID of the index to be searched
search_query: The query to run against the index
Returns:
Search results returned
"""
es = ElasticsearchDataStore()
return es.search(index_id, search_query)
def main():
"""Main DFDewey function."""
args = parse_args()
if not args.search and not args.search_list:
process_image(
args.image, args.case, not args.no_base64, not args.no_gzip,
not args.no_zip)
elif args.search:
search(args.search, args.case, args.image)
elif args.search_list:
search(None, args.case, args.image, args.search_list)
# Log to stdout
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
formatter = logging.Formatter('[%(levelname)s] %(message)s')
console_handler.setFormatter(formatter)
log.addHandler(console_handler)
if __name__ == '__main__':

View file

@ -1,414 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Image File Access Functions."""
import pytsk3
from dfdewey.datastore.postgresql import PostgresqlDataStore
def initialise_block_db(image_path, image_hash, case):
"""Creates a new image database.
Args:
image_path: Path to image file
image_hash: MD5 of the image
case: Case ID
Returns:
Boolean value to indicate whether the image has already been processed
"""
img = pytsk3.Img_Info(image_path)
block_db = PostgresqlDataStore(autocommit=True)
image_exists = check_tracking_database(block_db, image_path, image_hash, case)
if not image_exists:
db_name = ''.join(('fs', image_hash))
block_db.execute('CREATE DATABASE {0:s}'.format(db_name))
block_db.switch_database(db_name=db_name)
populate_block_db(img, block_db, batch_size=1500)
return image_exists
def check_tracking_database(tracking_db, image_path, image_hash, case):
"""Checks if an image exists in the tracking database.
Checks if an image exists in the tracking database and adds it if not.
If the image exists, but is not associated with the given case ID, will add
the association.
Args:
tracking_db: PostgreSQL database
image_path: Path to image file
image_hash: MD5 of the image
case: Case ID
Returns:
Boolean value to indicate the existence of the image
"""
tables_exist = tracking_db.table_exists('images')
image_exists = False
if not tables_exist:
tracking_db.execute(
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
tracking_db.execute(
"""
CREATE TABLE image_case (
case_id TEXT, image_hash TEXT REFERENCES images(image_hash),
PRIMARY KEY (case_id, image_hash))""")
else:
image_exists = tracking_db.value_exists('images', 'image_hash', image_hash)
image_case_exists = False
if image_exists:
image_case = tracking_db.query_single_row(
"""
SELECT 1 from image_case
WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format(
image_hash, case))
if image_case:
image_case_exists = True
if not image_exists:
tracking_db.execute(
"""
INSERT INTO images (image_path, image_hash)
VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash))
if not image_case_exists:
tracking_db.execute(
"""
INSERT INTO image_case (case_id, image_hash)
VALUES ('{0:s}', '{1:s}')""".format(case, image_hash))
return image_exists
def populate_block_db(img, block_db, batch_size=1500):
"""Creates a new image block database.
Args:
img: pytsk image info object
block_db: PostgreSQL database
batch_size: Number of rows to insert at a time
"""
print('Image database does not already exist. Parsing image filesystem(s)...')
block_db.execute(
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part INTEGER)')
block_db.execute(
'CREATE TABLE files (inum INTEGER, filename TEXT, part INTEGER)')
has_partition_table = False
try:
volume = pytsk3.Volume_Info(img)
if volume:
print('Image has a partition table...')
has_partition_table = True
rows = []
for part in volume:
print(
'Parsing partition {0:d}: {1:s}'.format(
part.addr, part.desc.decode('utf-8')))
if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
continue
filesystem = pytsk3.FS_Info(
img, offset=part.start * volume.info.block_size)
for inode in range(filesystem.info.first_inum,
filesystem.info.last_inum + 1):
file = filesystem.open_meta(inode)
if file.info.meta.nlink > 0:
for attr in file:
for run in attr:
for block in range(run.len):
rows.append((
run.addr + block,
inode,
part.addr,
))
if len(rows) >= batch_size:
block_db.bulk_insert('blocks (block, inum, part)', rows)
rows = []
if rows:
block_db.bulk_insert('blocks (block, inum, part)', rows)
# File names
directory = filesystem.open_dir(path='/')
list_directory(block_db, directory, part=part.addr, batch_size=batch_size)
except IOError:
pass
if not has_partition_table:
filesystem = pytsk3.FS_Info(img)
rows = []
for inode in range(filesystem.info.first_inum,
filesystem.info.last_inum + 1):
try:
file = filesystem.open_meta(inode)
if file.info.meta.nlink > 0:
for attr in file:
for run in attr:
for block in range(run.len):
rows.append((
run.addr + block,
inode,
))
if len(rows) >= batch_size:
block_db.bulk_insert('blocks (block, inum)', rows)
rows = []
if rows:
block_db.bulk_insert('blocks (block, inum)', rows)
except OSError:
continue
# File names
directory = filesystem.open_dir(path='/')
list_directory(block_db, directory, batch_size=batch_size)
block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);')
block_db.execute('CREATE INDEX files_index ON files (inum, part);')
def list_directory(
block_db, directory, part=None, stack=None, rows=None, batch_size=1500):
"""Recursive function to create a filesystem listing.
Args:
block_db: PostgreSQL database
directory: pytsk directory object
part: Partition number
stack: Inode stack to control recursive filesystem parsing
rows: Array for batch database inserts
batch_size: Number of rows to insert at a time
Returns:
Current rows array for recursion
"""
if not stack:
stack = []
if not rows:
rows = []
stack.append(directory.info.fs_file.meta.addr)
for directory_entry in directory:
# TODO(js): Refactor
if (not hasattr(directory_entry, 'info') or
not hasattr(directory_entry.info, 'name') or
not hasattr(directory_entry.info.name, 'name') or
directory_entry.info.meta is None or
directory_entry.info.name.name in [b'.', b'..'] or
directory_entry.info.name.flags == pytsk3.TSK_FS_NAME_FLAG_UNALLOC):
continue
try:
name = directory_entry.info.name.name.decode('utf-8')
except UnicodeDecodeError:
print('Unable to decode: {}'.format(directory_entry.info.name.name))
continue
if part:
rows.append((
directory_entry.info.meta.addr,
name.replace('\'', '\'\''),
part,
))
if len(rows) >= batch_size:
block_db.bulk_insert('files (inum, filename, part)', rows)
rows = []
else:
rows.append((
directory_entry.info.meta.addr,
name.replace('\'', '\'\''),
))
if len(rows) >= batch_size:
block_db.bulk_insert('files (inum, filename)', rows)
rows = []
try:
sub_directory = directory_entry.as_directory()
inode = directory_entry.info.meta.addr
if inode not in stack:
rows = list_directory(
block_db, sub_directory, part=part, stack=stack, rows=rows,
batch_size=batch_size)
except IOError:
pass
stack.pop(-1)
if not stack:
if part:
block_db.bulk_insert('files (inum, filename, part)', rows)
else:
block_db.bulk_insert('files (inum, filename)', rows)
return rows
def get_filename_from_offset(image_path, image_hash, offset):
"""Gets filename given a byte offset within an image.
Args:
image_path: Source image path
image_hash: Source image hash
offset: Byte offset within the image
Returns:
Filename allocated to the given offset
"""
img = pytsk3.Img_Info(image_path)
db_name = ''.join(('fs', image_hash))
block_db = PostgresqlDataStore(db_name=db_name)
device_block_size = None
partition = None
partition_offset = None
unalloc_part = False
try:
volume = pytsk3.Volume_Info(img)
device_block_size = volume.info.block_size
sector_offset = offset / device_block_size
for part in volume:
if part.start <= sector_offset < part.start + part.len:
if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
unalloc_part = True
partition = part.addr
partition_offset = part.start
except IOError:
pass
inums = None
if not unalloc_part:
try:
if not partition_offset:
filesystem = pytsk3.FS_Info(img)
else:
offset -= partition_offset * device_block_size
filesystem = pytsk3.FS_Info(
img, offset=partition_offset * device_block_size)
except TypeError as e:
print(e)
block_size = filesystem.info.block_size
inums = get_inums(block_db, offset / block_size, part=partition)
filenames = []
if inums:
for i in inums:
real_inum = i[0]
if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
mft_record_size_offset = 0x40
if partition_offset:
mft_record_size_offset = \
mft_record_size_offset + (partition_offset * device_block_size)
mft_record_size = int.from_bytes(
img.read(mft_record_size_offset, 1), 'little', signed=True)
if mft_record_size < 0:
mft_record_size = 2**(mft_record_size * -1)
else:
mft_record_size = mft_record_size * block_size
real_inum = get_resident_inum(offset, filesystem, mft_record_size)
filename = get_filename(block_db, real_inum, part=partition)
if filename and not filenames:
filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
else:
if '{0:s} ({1:d})'.format(filename, real_inum) not in filenames:
filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
if not filenames:
return 'No filenames found'
else:
return ' | '.join(filenames)
def get_inums(block_db, block, part=None):
"""Gets inode number from block offset.
Args:
block_db: PostgreSQL database
block: Block offset within the image
part: Partition number
Returns:
Inode number(s) of the given block or None
"""
if part:
inums = block_db.query(
'SELECT inum FROM blocks WHERE block = {0:d} AND part = {1:d}'.format(
int(block), part))
else:
inums = block_db.query(
'SELECT inum FROM blocks WHERE block = {0:d}'.format(int(block)))
return inums
def get_resident_inum(offset, filesystem, mft_record_size):
"""Gets the inode number associated with NTFS $MFT resident data.
Args:
offset: Data offset within volume
filesystem: pytsk3 FS_INFO object
mft_record_size: Size of an $MFT entry
Returns:
inode number of resident data
"""
block_size = filesystem.info.block_size
offset_block = int(offset / block_size)
inode = filesystem.open_meta(0)
mft_entry = 0
for attr in inode:
for run in attr:
for block in range(run.len):
if run.addr + block == offset_block:
mft_entry += int(
(offset - (offset_block * block_size)) / mft_record_size)
return mft_entry
else:
mft_entry += int(block_size / mft_record_size)
return 0
def get_filename(block_db, inum, part=None):
"""Gets filename given an inode number.
Args:
block_db: PostgreSQL database
inum: Inode number of target file
part: Partition number
Returns:
Filename of given inode or None
"""
if part:
filenames = block_db.query(
'SELECT filename FROM files WHERE inum = {0:d} AND part = {1:d}'.format(
inum, part))
else:
filenames = block_db.query(
'SELECT filename FROM files WHERE inum = {0:d}'.format(inum))
if filenames:
filename = filenames[0][0]
else:
filename = 'No filenames found'
return filename

View file

@ -0,0 +1,637 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Image processor."""
from datetime import datetime
import logging
import os
import subprocess
import tempfile
from dfvfs.helpers import volume_scanner
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.lib import errors as dfvfs_errors
from dfvfs.resolver import resolver
from dfvfs.volume import tsk_volume_system
import pytsk3
from dfdewey.datastore.elastic import ElasticsearchDataStore
from dfdewey.datastore.postgresql import PostgresqlDataStore
BATCH_SIZE = 1500
STRING_INDEXING_LOG_INTERVAL = 10000000
log = logging.getLogger('dfdewey.image_processor')
class _StringRecord():
"""Elasticsearch string record.
Attributes:
image: Hash to identify the source image of the string
offset: Byte offset of the string within the source image
file_offset: If the string is extracted from a compressed stream, the byte
offset within the stream
data: The string to be indexed
"""
def __init__(self):
self.image = ''
self.offset = 0
self.file_offset = None
self.data = ''
class FileEntryScanner(volume_scanner.VolumeScanner):
"""File entry scanner."""
_NON_PRINTABLE_CHARACTERS = list(range(0, 0x20)) + list(range(0x7f, 0xa0))
_ESCAPE_CHARACTERS = str.maketrans({
value: '\\x{0:02x}'.format(value) for value in _NON_PRINTABLE_CHARACTERS
})
def __init__(self, mediator=None):
"""Initializes a file entry scanner.
Args:
mediator (VolumeScannerMediator): a volume scanner mediator.
"""
super().__init__(mediator=mediator)
self._datastore = None
self._list_only_files = False
self._rows = []
self._volumes = {}
def _get_display_path(self, path_spec, path_segments, data_stream_name):
"""Retrieves a path to display.
Args:
path_spec (dfvfs.PathSpec): path specification of the file entry.
path_segments (list[str]): path segments of the full path of the file
entry.
data_stream_name (str): name of the data stream.
Returns:
str: path to display.
"""
display_path = ''
if path_spec.HasParent():
parent_path_spec = path_spec.parent
if parent_path_spec and parent_path_spec.type_indicator == (
dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION):
display_path = ''.join([display_path, parent_path_spec.location])
path_segments = [
segment.translate(self._ESCAPE_CHARACTERS) for segment in path_segments
]
display_path = ''.join([display_path, '/'.join(path_segments)])
if data_stream_name:
data_stream_name = data_stream_name.translate(self._ESCAPE_CHARACTERS)
display_path = ':'.join([display_path, data_stream_name])
return display_path or '/'
def _get_inode(self, path_spec):
"""Gets the inode from a file entry path spec.
Args:
path_spec (dfvfs.PathSpec): file entry path spec.
"""
inode = None
if path_spec.type_indicator == dfvfs_definitions.TYPE_INDICATOR_NTFS:
inode = getattr(path_spec, 'mft_entry', None)
else:
inode = getattr(path_spec, 'inode', None)
return inode
def _get_tsk_partition_path_spec(self, path_spec):
"""Gets the path spec for the TSK partition.
Args:
path_spec (dfvfs.PathSpec): path spec of the volume.
Returns:
TSK partition path_spec or None.
"""
partition_path_spec = None
while path_spec.HasParent():
type_indicator = path_spec.type_indicator
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION:
partition_path_spec = path_spec
break
path_spec = path_spec.parent
return partition_path_spec
def _get_volume_location(self, path_spec):
"""Gets volume location / identifier for the given path spec.
Args:
path_spec (dfvfs.PathSpec): path spec of the volume.
Returns:
Volume location / identifier.
"""
location = getattr(path_spec, 'location', None)
while path_spec.HasParent():
type_indicator = path_spec.type_indicator
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION:
if location in ('\\', '/'):
location = getattr(path_spec, 'location', None)
break
path_spec = path_spec.parent
return location
def _list_file_entry(
self, file_system, file_entry, parent_path_segments, location):
"""Lists a file entry.
Args:
file_system (dfvfs.FileSystem): file system that contains the file entry.
file_entry (dfvfs.FileEntry): file entry to list.
parent_path_segments (str): path segments of the full path of the parent
file entry.
location (str): volume location / identifier.
"""
path_segments = parent_path_segments + [file_entry.name]
inode = self._get_inode(file_entry.path_spec)
filename = self._get_display_path(file_entry.path_spec, path_segments, '')
if not self._list_only_files or file_entry.IsFile():
if inode is not None:
self._rows.append((
inode,
filename,
location,
))
for data_stream in file_entry.data_streams:
if not data_stream.IsDefault():
filename = ':'.join((filename, data_stream.name))
self._rows.append((
inode,
filename,
location,
))
if len(self._rows) >= BATCH_SIZE:
self._datastore.bulk_insert(
'files (inum, filename, part)', self._rows)
self._rows = []
for sub_file_entry in file_entry.sub_file_entries:
self._list_file_entry(
file_system, sub_file_entry, path_segments, location)
def get_volume_extents(self, image_path):
"""Gets the extents of all volumes.
Args:
image_path (str): path of the source image.
Returns:
Volume location / identifier, offset, and size for all volumes.
"""
if not self._volumes or self._source_path != image_path:
base_path_specs = self.GetBasePathSpecs(image_path)
for path_spec in base_path_specs:
partition_path_spec = self._get_tsk_partition_path_spec(path_spec)
if not partition_path_spec:
location = getattr(path_spec, 'location', None)
self._volumes[location] = {'start': 0, 'end': None}
else:
location = getattr(partition_path_spec, 'location', None)
partition_offset = None
partition_size = None
volume_system = tsk_volume_system.TSKVolumeSystem()
try:
volume_system.Open(partition_path_spec)
volume_identifier = location.replace('/', '')
volume = volume_system.GetVolumeByIdentifier(volume_identifier)
partition_offset = volume.extents[0].offset
partition_size = volume.extents[0].size
except dfvfs_errors.VolumeSystemError as e:
log.error('Could not process partition: %s', e)
self._volumes[location] = {
'start': partition_offset,
'end': partition_offset + partition_size
}
return self._volumes
def parse_file_entries(self, base_path_specs, datastore):
"""Parses file entries in the base path specification.
Stores parsed entries in the PostgreSQL datastore.
Args:
base_path_specs (list[dfvfs.PathSpec]): source path specification.
datastore (PostgresqlDataStore): PostgreSQL datastore.
"""
self._datastore = datastore
for base_path_spec in base_path_specs:
file_system = resolver.Resolver.OpenFileSystem(base_path_spec)
file_entry = resolver.Resolver.OpenFileEntry(base_path_spec)
if file_entry is None:
log.warning(
'Unable to open base path specification: %s',
base_path_spec.comparable)
return
location = self._get_volume_location(base_path_spec)
self._list_file_entry(file_system, file_entry, [], location)
if self._rows:
self._datastore.bulk_insert('files (inum, filename, part)', self._rows)
self._rows = []
class ImageProcessor():
"""Image processor class.
Attributes:
case (str): case ID.
elasticsearch (ElasticsearchDataStore): elasticsearch datastore.
image_hash (str): MD5 hash of the image.
image_path (str): path to source image.
options (ImageProcessorOptions): image processor options.
output_path (str): output directory for string extraction.
path_specs (dfvfs.PathSpec): volume path specs.
postgresql (PostgresqlDataStore): postgresql database.
scanner (FileEntryScanner): dfvfs volume / file entry scanner.
"""
def __init__(self, case, image_path, options):
"""Create an image processor."""
super().__init__()
self.case = case
self.elasticsearch = None
self.image_hash = None
self.image_path = image_path
self.options = options
self.output_path = None
self.path_specs = []
self.postgresql = None
self.scanner = None
def _already_parsed(self):
"""Check if image is already parsed.
Checks whether the image is already in the database.
If so, checks whether it's attached to the case.
Adds the image to the database and attaches it to the case.
Returns:
True if image has already been parsed, False if not.
"""
tables_exist = self.postgresql.table_exists('images')
image_exists = False
if not tables_exist:
self._initialise_database()
else:
image_exists = self.postgresql.value_exists(
'images', 'image_hash', self.image_hash)
# Even if the image has already been parsed, it may have been in a different
# case.
image_case_exists = False
if image_exists:
image_case = self.postgresql.query_single_row((
'SELECT 1 from image_case '
'WHERE image_hash = \'{0:s}\' AND case_id = \'{1:s}\'').format(
self.image_hash, self.case))
if image_case:
image_case_exists = True
else:
self.postgresql.execute((
'INSERT INTO images (image_path, image_hash) '
'VALUES (\'{0:s}\', \'{1:s}\')').format(
self.image_path, self.image_hash))
if not image_case_exists:
self.postgresql.execute((
'INSERT INTO image_case (case_id, image_hash) '
'VALUES (\'{0:s}\', \'{1:s}\')').format(self.case, self.image_hash))
return image_exists
def _create_filesystem_database(self):
"""Create a filesystem database for the image."""
self.postgresql.execute((
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part TEXT, '
'PRIMARY KEY (block, inum, part))'))
self.postgresql.execute((
'CREATE TABLE files (inum INTEGER, filename TEXT, part TEXT, '
'PRIMARY KEY (inum, filename, part))'))
def _extract_strings(self):
"""String extraction.
Extract strings from the image using bulk_extractor.
"""
cmd = [
'bulk_extractor', '-o', self.output_path, '-x', 'all', '-e', 'wordlist'
]
if self.options.base64:
cmd.extend(['-e', 'base64'])
if self.options.gunzip:
cmd.extend(['-e', 'gzip'])
if self.options.unzip:
cmd.extend(['-e', 'zip'])
cmd.extend(['-S', 'strings=YES', '-S', 'word_max=1000000'])
cmd.append(self.image_path)
log.info('Running bulk_extractor: [%s]', ' '.join(cmd))
try:
output = subprocess.check_output(cmd)
except subprocess.CalledProcessError as e:
raise RuntimeError('String extraction failed.') from e
md5_offset = output.index(b'MD5') + 19
self.image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
def _get_volume_details(self, path_spec):
"""Logs volume details for the given path spec.
Args:
path_spec (dfvfs.PathSpec): path spec of the volume.
Returns:
Volume location / identifier and byte offset.
"""
location = getattr(path_spec, 'location', None)
start_offset = 0
while path_spec.HasParent():
type_indicator = path_spec.type_indicator
if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION:
if location in ('\\', '/'):
location = getattr(path_spec, 'location', None)
start_offset = getattr(path_spec, 'start_offset', 0)
break
path_spec = path_spec.parent
return location, start_offset
def _index_record(self, index_name, string_record):
"""Index a single record.
Args:
index_name: ID of the elasticsearch index.
string_record: String record to be indexed.
Returns:
Number of records processed
"""
json_record = {
'image': string_record.image,
'offset': string_record.offset,
'file_offset': string_record.file_offset,
'data': string_record.data
}
return self.elasticsearch.import_event(index_name, event=json_record)
def _index_strings(self):
"""Index the extracted strings."""
self.elasticsearch = ElasticsearchDataStore()
index_name = ''.join(('es', self.image_hash))
if self.elasticsearch.index_exists(index_name):
log.info('Image already indexed: [%s]', self.image_path)
else:
index_name = self.elasticsearch.create_index(index_name=index_name)
log.info('Index %s created.', index_name)
string_list = os.path.join(self.output_path, 'wordlist.txt')
records = 0
with open(string_list, 'r') as strings:
for line in strings:
# Ignore the comments added by bulk_extractor
if not line.startswith('#'):
string_record = _StringRecord()
string_record.image = self.image_hash
# Split each string into offset and data
line = line.split('\t')
offset = line[0]
data = '\t'.join(line[1:])
# If the string is from a decoded / decompressed stream, split the
# offset into image offset and file offset
if offset.find('-') > 0:
offset = offset.split('-')
image_offset = offset[0]
file_offset = '-'.join(offset[1:])
string_record.offset = int(image_offset)
string_record.file_offset = file_offset
else:
string_record.offset = int(offset)
string_record.data = data
records = self._index_record(index_name, string_record)
if records % STRING_INDEXING_LOG_INTERVAL == 0:
log.info('Indexed %d records...', records)
# Flush the import buffer
records = self.elasticsearch.import_event(index_name)
log.info('Indexed %d records...', records)
def _initialise_database(self):
"""Initialse the image database."""
self.postgresql.execute(
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
self.postgresql.execute((
'CREATE TABLE image_case ('
'case_id TEXT, image_hash TEXT REFERENCES images(image_hash), '
'PRIMARY KEY (case_id, image_hash))'))
def _parse_filesystems(self):
"""Filesystem parsing.
Parse each filesystem to create a mapping from byte offsets to files.
"""
self.postgresql = PostgresqlDataStore(autocommit=True)
if self._already_parsed():
log.info('Image already parsed: [%s]', self.image_path)
else:
db_name = ''.join(('fs', self.image_hash))
self.postgresql.execute('CREATE DATABASE {0:s}'.format(db_name))
self.postgresql.switch_database(db_name=db_name)
self._create_filesystem_database()
# Scan image for volumes
mediator = UnattendedVolumeScannerMediator()
try:
self.scanner = FileEntryScanner(mediator=mediator)
self.path_specs = self.scanner.GetBasePathSpecs(self.image_path)
log.info(
'Found %d volume%s in [%s]:', len(self.path_specs),
'' if len(self.path_specs) == 1 else 's', self.image_path)
except dfvfs_errors.ScannerError as e:
log.error('Error scanning for partitions: %s', e)
for path_spec in self.path_specs:
location, start_offset = self._get_volume_details(path_spec)
log.info(
'%s: %s (Offset %d)', location, path_spec.type_indicator,
start_offset)
if path_spec.type_indicator in (dfvfs_definitions.TYPE_INDICATOR_NTFS,
dfvfs_definitions.TYPE_INDICATOR_TSK):
self._parse_inodes(location, start_offset)
self.scanner.parse_file_entries([path_spec], self.postgresql)
else:
log.warning(
'Volume type %s is not supported.', path_spec.type_indicator)
def _parse_inodes(self, location, start_offset):
"""Parse filesystem inodes.
Create a mapping from blocks to inodes.
Args:
location (str): location / identifier of the volume.
start_offset (int): byte offset of the volume.
"""
rows = []
image = pytsk3.Img_Info(self.image_path)
filesystem = pytsk3.FS_Info(image, offset=start_offset)
for inode in range(filesystem.info.first_inum,
filesystem.info.last_inum + 1):
file_metadata = filesystem.open_meta(inode)
if file_metadata.info.meta.nlink > 0:
for attribute in file_metadata:
for run in attribute:
for block in range(run.len):
rows.append((
run.addr + block,
inode,
location,
))
if len(rows) >= BATCH_SIZE:
self.postgresql.bulk_insert('blocks (block, inum, part)', rows)
rows = []
if rows:
self.postgresql.bulk_insert('blocks (block, inum, part)', rows)
def process_image(self):
"""Process the image."""
self.output_path = tempfile.mkdtemp()
log.info('* Processing start: %s', datetime.now())
self._extract_strings()
log.info('String extraction complete.')
log.info('* Parsing image: %s', datetime.now())
self._parse_filesystems()
log.info('Parsing complete.')
log.info('* Indexing strings: %s', datetime.now())
self._index_strings()
log.info('Indexing complete.')
log.info('* Processing complete: %s', datetime.now())
class ImageProcessorOptions():
"""Image processor options.
Attributes:
base64 (bool): decode base64.
gunzip (bool): decompress gzip.
unzip (bool): decompress zip.
"""
def __init__(self, base64=True, gunzip=True, unzip=True):
"""Initialise image processor options."""
super().__init__()
self.base64 = base64
self.gunzip = gunzip
self.unzip = unzip
class UnattendedVolumeScannerMediator(volume_scanner.VolumeScannerMediator):
"""Unattended volume scanner mediator."""
def GetAPFSVolumeIdentifiers(self, volume_system, volume_identifiers):
"""Retrieves APFS volume identifiers.
In an unattended execution, this method returns all volume identifiers.
Args:
volume_system (APFSVolumeSystem): volume system.
volume_identifiers (list[str]): volume identifiers including prefix.
Returns:
list[str]: all volume identifiers including prefix.
"""
prefix = 'apfs'
return [
'{0:s}{1:d}'.format(prefix, volume_index)
for volume_index in range(1, volume_system.number_of_volumes + 1)
]
def GetPartitionIdentifiers(self, volume_system, volume_identifiers):
"""Retrieves partition identifiers.
In an unattended execution, this method returns all partition identifiers.
Args:
volume_system (TSKVolumeSystem): volume system.
volume_identifiers (list[str]): volume identifiers including prefix.
Returns:
list[str]: all volume identifiers including prefix.
"""
prefix = 'p'
return [
'{0:s}{1:d}'.format(prefix, volume_index)
for volume_index in range(1, volume_system.number_of_volumes + 1)
]
def GetVSSStoreIdentifiers(self, volume_system, volume_identifiers):
"""Retrieves VSS store identifiers.
Placeholder method for VSS support.
Args:
volume_system (VShadowVolumeSystem): volume system.
volume_identifiers (list[str]): volume identifiers including prefix.
Returns:
list[str]: None.
"""
return []
def UnlockEncryptedVolume(
self, source_scanner_object, scan_context, locked_scan_node, credentials):
"""Unlocks an encrypted volume.
Placeholder method for encrypted volume support.
Args:
source_scanner_object (SourceScanner): source scanner.
scan_context (SourceScannerContext): source scanner context.
locked_scan_node (SourceScanNode): locked scan node.
credentials (Credentials): credentials supported by the locked scan node.
Returns:
bool: True if the volume was unlocked.
"""
log.warning(
'Encrypted volumes are currently unsupported: %s',
locked_scan_node.path_spec.CopyToDict())
return False

View file

@ -0,0 +1,284 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Index searcher."""
import logging
import os
from dfvfs.lib import errors as dfvfs_errors
import pytsk3
from tabulate import tabulate
from dfdewey.datastore.elastic import ElasticsearchDataStore
from dfdewey.datastore.postgresql import PostgresqlDataStore
from dfdewey.utils.image_processor import (
FileEntryScanner, UnattendedVolumeScannerMediator)
log = logging.getLogger('dfdewey.index_searcher')
class _SearchHit():
"""Search result.
Attributes:
offset: byte offset of the string within the source image.
filename: filename containing the string if applicable.
data: the responsive string.
"""
def __init__(self):
self.offset = 0
self.filename = None
self.data = ''
def copy_to_dict(self):
"""Copies the search hit to a dictionary.
Returns:
dict[str, object]: search hit attributes.
"""
search_hit_dict = {}
search_hit_dict['Offset'] = self.offset
search_hit_dict['Filename (inode)'] = self.filename
search_hit_dict['String'] = self.data
return search_hit_dict
class IndexSearcher():
"""Index Searcher class."""
def __init__(self, case, image):
"""Create an index searcher."""
super().__init__()
self.case = case
self.elasticsearch = ElasticsearchDataStore()
self.image = image
self.images = {}
self.postgresql = PostgresqlDataStore()
self.scanner = None
if image != 'all':
self.image = os.path.abspath(self.image)
self._get_image_hash()
else:
self._get_case_images()
def _get_case_images(self):
"""Get all images for the case.
Returns:
A dictionary of the images in the case.
"""
images = self.postgresql.query((
'SELECT image_hash, image_path FROM image_case NATURAL JOIN images '
'WHERE case_id = \'{0:s}\'').format(self.case))
for image_hash, image_path in images:
self.images[image_hash] = image_path
def _get_filenames_from_inode(self, inode, location):
"""Gets filename(s) from an inode number.
Args:
inode: Inode number of target file
location: Partition number
Returns:
Filename of given inode or None
"""
results = self.postgresql.query((
'SELECT filename FROM files '
'WHERE inum = {0:d} AND part = \'{1:s}\'').format(inode, location))
filenames = []
for result in results:
filenames.append(result[0])
return filenames
def _get_filename_from_offset(self, image_path, image_hash, offset):
"""Gets filename given a byte offset within an image.
Args:
image_path: source image path.
image_hash: source image hash.
offset: byte offset within the image.
Returns:
Filename allocated to the given offset, or None.
"""
filenames = []
database_name = ''.join(('fs', image_hash))
self.postgresql.switch_database(db_name=database_name)
volume_extents = {}
try:
if not self.scanner:
mediator = UnattendedVolumeScannerMediator()
self.scanner = FileEntryScanner(mediator=mediator)
volume_extents = self.scanner.get_volume_extents(image_path)
except dfvfs_errors.ScannerError as e:
log.error('Error scanning for partitions: %s', e)
hit_location = None
partition_offset = None
for location, extent in volume_extents.items():
if not extent['end']:
# Image is of a single volume
hit_location = location
partition_offset = extent['start']
elif extent['start'] <= offset < extent['end']:
hit_location = location
partition_offset = extent['start']
if partition_offset is not None:
try:
img = pytsk3.Img_Info(image_path)
filesystem = pytsk3.FS_Info(img, offset=partition_offset)
block_size = filesystem.info.block_size
except TypeError as e:
log.error('Error opening image: %s', e)
inodes = self._get_inodes(
int((offset - partition_offset) / block_size), hit_location)
if inodes:
for i in inodes:
inode = i[0]
# Account for resident files
if (i[0] == 0 and
filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT):
mft_record_size_offset = 0x40 + partition_offset
mft_record_size = int.from_bytes(
img.read(mft_record_size_offset, 1), 'little', signed=True)
if mft_record_size < 0:
mft_record_size = 2**(mft_record_size * -1)
else:
mft_record_size = mft_record_size * block_size
inode = self._get_ntfs_resident_inode((offset - partition_offset),
filesystem, mft_record_size)
inode_filenames = self._get_filenames_from_inode(inode, hit_location)
filename = ' | '.join(inode_filenames)
filenames.append('{0:s} ({1:d})'.format(filename, inode))
return filenames
def _get_image_hash(self):
"""Get an image hash from the datastore.
Returns:
MD5 hash for the image stored in PostgreSQL.
"""
image_hash = self.postgresql.query_single_row(
'SELECT image_hash FROM images WHERE image_path = \'{0:s}\''.format(
self.image))
if image_hash:
self.images[image_hash[0]] = self.image
def _get_inodes(self, block, location):
"""Gets inode numbers for a block offset.
Args:
block (int): block offset within the image.
location (str): Partition location / identifier.
Returns:
Inode number(s) of the given block or None.
"""
inodes = self.postgresql.query(
('SELECT inum FROM blocks '
'WHERE block = {0:d} AND part = \'{1:s}\'').format(block, location))
return inodes
def _get_ntfs_resident_inode(self, offset, filesystem, mft_record_size):
"""Gets the inode number associated with NTFS $MFT resident data.
Args:
offset: data offset within volume.
filesystem: pytsk3 FS_INFO object.
mft_record_size: size of each $MFT entry.
Returns:
inode number of resident data
"""
block_size = filesystem.info.block_size
offset_block = int(offset / block_size)
inode = filesystem.open_meta(0)
mft_entry = 0
for attr in inode:
for run in attr:
for block in range(run.len):
if run.addr + block == offset_block:
mft_entry += int(
(offset - (offset_block * block_size)) / mft_record_size)
return mft_entry
mft_entry += int(block_size / mft_record_size)
return 0
def list_search(self, query_list):
"""Query a list of search terms.
Args:
query_list (str): path to a text file containing multiple search terms.
"""
for image_hash, image_path in self.images.items():
index = ''.join(('es', image_hash))
with open(query_list, 'r') as search_terms:
table_data = []
for term in search_terms:
term = ''.join(('"', term.strip(), '"'))
results = self.elasticsearch.search(index, term)
hit_count = results['hits']['total']['value']
if hit_count > 0:
table_data.append({'Search term': term, 'Hits': hit_count})
if table_data:
output = tabulate(table_data, headers='keys', tablefmt='simple')
else:
output = 'No results.'
log.info(
'Searched %s (%s) for terms in %s\n\n%s\n', image_path, image_hash,
query_list, output)
def search(self, query):
"""Run a single query.
Args:
query (str): query to run.
"""
for image_hash, image_path in self.images.items():
log.info('Searching %s (%s) for "%s"', image_path, image_hash, query)
index = ''.join(('es', image_hash))
results = self.elasticsearch.search(index, query)
result_count = results['hits']['total']['value']
time_taken = results['took']
results = results['hits']['hits']
hits = []
for result in results:
hit = _SearchHit()
offset = str(result['_source']['offset'])
if result['_source']['file_offset']:
offset = '-'.join((offset, result['_source']['file_offset']))
hit.offset = offset
filenames = self._get_filename_from_offset(
image_path, image_hash, result['_source']['offset'])
hit.filename = '\n'.join(filenames)
hit.data = result['_source']['data'].strip()
hits.append(hit.copy_to_dict())
output = tabulate(hits, headers='keys', tablefmt='simple')
log.info(
'Returned %d results in %dms.\n\n%s\n', result_count, time_taken,
output)

26
dfvfs_requirements.txt Normal file
View file

@ -0,0 +1,26 @@
pip >= 7.0.0
PyYAML >= 3.10
cffi >= 1.9.1
cryptography >= 2.0.2
dfdatetime >= 20200809
dtfabric >= 20170524
idna >= 2.5
libbde-python >= 20140531
libewf-python >= 20131210
libfsapfs-python >= 20201107
libfsext-python >= 20200819
libfshfs-python >= 20201103
libfsntfs-python >= 20200921
libfsxfs-python >= 20201114
libfvde-python >= 20160719
libfwnt-python >= 20160418
libluksde-python >= 20200101
libqcow-python >= 20131204
libsigscan-python >= 20191221
libsmdev-python >= 20140529
libsmraw-python >= 20140612
libvhdi-python >= 20201014
libvmdk-python >= 20140421
libvshadow-python >= 20160109
libvslvm-python >= 20160109
pytsk3 >= 20160721

View file

@ -1,14 +1,14 @@
# Using dfDewey
```shell
usage: dfdcli.py [-h] -c CASE [-i IMAGE] [--no_base64] [--no_gzip] [--no_zip]
[-s SEARCH] [--search_list SEARCH_LIST]
usage: dfdcli.py [-h] [--no_base64] [--no_gzip] [--no_zip] [-s SEARCH] [--search_list SEARCH_LIST] case [image]
positional arguments:
case case ID
image image file (default: 'all')
optional arguments:
-h, --help show this help message and exit
-c CASE, --case CASE case ID
-i IMAGE, --image IMAGE
image file
--no_base64 don't decode base64
--no_gzip don't decompress gzip
--no_zip don't decompress zip
@ -16,6 +16,7 @@ optional arguments:
search query
--search_list SEARCH_LIST
file with search queries
```
## Docker
@ -59,7 +60,7 @@ docker run --network=host -v ~/images/:/mnt/images <docker_name> dfdewey -h
To process an image in dfDewey, you need to supply a `CASE` and `IMAGE`.
```shell
dfdcli.py -c testcase -i /path/to/image.dd
dfdcli.py testcase /path/to/image.dd
```
dfDewey will have bulk_extractor decode base64 data, and decompress gzip / zip
@ -72,7 +73,7 @@ To search the index for a single image, you need to supply a `CASE`, `IMAGE`,
and `SEARCH`.
```shell
dfdcli.py -c testcase -i /path/to/image.dd -s foo
dfdcli.py testcase /path/to/image.dd -s 'foo'
```
If an `IMAGE` is not provided, dfDewey will search all images in the given case.
@ -82,5 +83,5 @@ a text file one per line. In this case, only the number of results for each term
is returned.
```shell
dfdcli.py -c testcase -i /path/to/image.dd --search_list search_terms.txt
dfdcli.py testcase /path/to/image.dd --search_list search_terms.txt
```

View file

@ -1,4 +1,6 @@
dfvfs
elasticsearch
psycopg2-binary
pytsk3
six
tabulate

View file

@ -31,6 +31,8 @@ DFDEWEY_DESCRIPTION = (
requirements = []
with open('requirements.txt','r') as f:
requirements = f.read().splitlines()
with open('dfvfs_requirements.txt','r') as f:
requirements.extend(f.read().splitlines())
setup(
name='dfDewey',
version=dfdewey.__version__,