Refactoring CLI, processing and searching
This commit is contained in:
parent
2f78367a37
commit
15ad0beb02
3 changed files with 27 additions and 416 deletions
|
@ -68,7 +68,7 @@ def main():
|
|||
if args.search:
|
||||
index_searcher.search(args.search)
|
||||
elif args.search_list:
|
||||
pass
|
||||
index_searcher.list_search(args.search_list)
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
|
|
@ -1,414 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Image File Access Functions."""
|
||||
|
||||
import pytsk3
|
||||
|
||||
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
||||
|
||||
|
||||
def initialise_block_db(image_path, image_hash, case):
|
||||
"""Creates a new image database.
|
||||
|
||||
Args:
|
||||
image_path: Path to image file
|
||||
image_hash: MD5 of the image
|
||||
case: Case ID
|
||||
|
||||
Returns:
|
||||
Boolean value to indicate whether the image has already been processed
|
||||
"""
|
||||
img = pytsk3.Img_Info(image_path)
|
||||
|
||||
block_db = PostgresqlDataStore(autocommit=True)
|
||||
image_exists = check_tracking_database(block_db, image_path, image_hash, case)
|
||||
|
||||
if not image_exists:
|
||||
db_name = ''.join(('fs', image_hash))
|
||||
block_db.execute('CREATE DATABASE {0:s}'.format(db_name))
|
||||
|
||||
block_db.switch_database(db_name=db_name)
|
||||
|
||||
populate_block_db(img, block_db, batch_size=1500)
|
||||
|
||||
return image_exists
|
||||
|
||||
|
||||
def check_tracking_database(tracking_db, image_path, image_hash, case):
|
||||
"""Checks if an image exists in the tracking database.
|
||||
|
||||
Checks if an image exists in the tracking database and adds it if not.
|
||||
If the image exists, but is not associated with the given case ID, will add
|
||||
the association.
|
||||
|
||||
Args:
|
||||
tracking_db: PostgreSQL database
|
||||
image_path: Path to image file
|
||||
image_hash: MD5 of the image
|
||||
case: Case ID
|
||||
|
||||
Returns:
|
||||
Boolean value to indicate the existence of the image
|
||||
"""
|
||||
tables_exist = tracking_db.table_exists('images')
|
||||
|
||||
image_exists = False
|
||||
if not tables_exist:
|
||||
tracking_db.execute(
|
||||
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
|
||||
|
||||
tracking_db.execute(
|
||||
"""
|
||||
CREATE TABLE image_case (
|
||||
case_id TEXT, image_hash TEXT REFERENCES images(image_hash),
|
||||
PRIMARY KEY (case_id, image_hash))""")
|
||||
else:
|
||||
image_exists = tracking_db.value_exists('images', 'image_hash', image_hash)
|
||||
|
||||
image_case_exists = False
|
||||
if image_exists:
|
||||
image_case = tracking_db.query_single_row(
|
||||
"""
|
||||
SELECT 1 from image_case
|
||||
WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format(
|
||||
image_hash, case))
|
||||
if image_case:
|
||||
image_case_exists = True
|
||||
|
||||
if not image_exists:
|
||||
tracking_db.execute(
|
||||
"""
|
||||
INSERT INTO images (image_path, image_hash)
|
||||
VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash))
|
||||
if not image_case_exists:
|
||||
tracking_db.execute(
|
||||
"""
|
||||
INSERT INTO image_case (case_id, image_hash)
|
||||
VALUES ('{0:s}', '{1:s}')""".format(case, image_hash))
|
||||
|
||||
return image_exists
|
||||
|
||||
|
||||
def populate_block_db(img, block_db, batch_size=1500):
|
||||
"""Creates a new image block database.
|
||||
|
||||
Args:
|
||||
img: pytsk image info object
|
||||
block_db: PostgreSQL database
|
||||
batch_size: Number of rows to insert at a time
|
||||
"""
|
||||
print('Image database does not already exist. Parsing image filesystem(s)...')
|
||||
block_db.execute(
|
||||
'CREATE TABLE blocks (block INTEGER, inum INTEGER, part INTEGER)')
|
||||
block_db.execute(
|
||||
'CREATE TABLE files (inum INTEGER, filename TEXT, part INTEGER)')
|
||||
|
||||
has_partition_table = False
|
||||
try:
|
||||
volume = pytsk3.Volume_Info(img)
|
||||
if volume:
|
||||
print('Image has a partition table...')
|
||||
has_partition_table = True
|
||||
rows = []
|
||||
for part in volume:
|
||||
print(
|
||||
'Parsing partition {0:d}: {1:s}'.format(
|
||||
part.addr, part.desc.decode('utf-8')))
|
||||
if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
|
||||
continue
|
||||
filesystem = pytsk3.FS_Info(
|
||||
img, offset=part.start * volume.info.block_size)
|
||||
for inode in range(filesystem.info.first_inum,
|
||||
filesystem.info.last_inum + 1):
|
||||
file = filesystem.open_meta(inode)
|
||||
if file.info.meta.nlink > 0:
|
||||
for attr in file:
|
||||
for run in attr:
|
||||
for block in range(run.len):
|
||||
rows.append((
|
||||
run.addr + block,
|
||||
inode,
|
||||
part.addr,
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('blocks (block, inum, part)', rows)
|
||||
rows = []
|
||||
if rows:
|
||||
block_db.bulk_insert('blocks (block, inum, part)', rows)
|
||||
|
||||
# File names
|
||||
directory = filesystem.open_dir(path='/')
|
||||
list_directory(block_db, directory, part=part.addr, batch_size=batch_size)
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
if not has_partition_table:
|
||||
filesystem = pytsk3.FS_Info(img)
|
||||
rows = []
|
||||
for inode in range(filesystem.info.first_inum,
|
||||
filesystem.info.last_inum + 1):
|
||||
try:
|
||||
file = filesystem.open_meta(inode)
|
||||
if file.info.meta.nlink > 0:
|
||||
for attr in file:
|
||||
for run in attr:
|
||||
for block in range(run.len):
|
||||
rows.append((
|
||||
run.addr + block,
|
||||
inode,
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('blocks (block, inum)', rows)
|
||||
rows = []
|
||||
if rows:
|
||||
block_db.bulk_insert('blocks (block, inum)', rows)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
# File names
|
||||
directory = filesystem.open_dir(path='/')
|
||||
list_directory(block_db, directory, batch_size=batch_size)
|
||||
|
||||
block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);')
|
||||
block_db.execute('CREATE INDEX files_index ON files (inum, part);')
|
||||
|
||||
|
||||
def list_directory(
|
||||
block_db, directory, part=None, stack=None, rows=None, batch_size=1500):
|
||||
"""Recursive function to create a filesystem listing.
|
||||
|
||||
Args:
|
||||
block_db: PostgreSQL database
|
||||
directory: pytsk directory object
|
||||
part: Partition number
|
||||
stack: Inode stack to control recursive filesystem parsing
|
||||
rows: Array for batch database inserts
|
||||
batch_size: Number of rows to insert at a time
|
||||
|
||||
Returns:
|
||||
Current rows array for recursion
|
||||
"""
|
||||
if not stack:
|
||||
stack = []
|
||||
if not rows:
|
||||
rows = []
|
||||
stack.append(directory.info.fs_file.meta.addr)
|
||||
|
||||
for directory_entry in directory:
|
||||
# TODO(js): Refactor
|
||||
if (not hasattr(directory_entry, 'info') or
|
||||
not hasattr(directory_entry.info, 'name') or
|
||||
not hasattr(directory_entry.info.name, 'name') or
|
||||
directory_entry.info.meta is None or
|
||||
directory_entry.info.name.name in [b'.', b'..'] or
|
||||
directory_entry.info.name.flags == pytsk3.TSK_FS_NAME_FLAG_UNALLOC):
|
||||
continue
|
||||
try:
|
||||
name = directory_entry.info.name.name.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
print('Unable to decode: {}'.format(directory_entry.info.name.name))
|
||||
continue
|
||||
if part:
|
||||
rows.append((
|
||||
directory_entry.info.meta.addr,
|
||||
name.replace('\'', '\'\''),
|
||||
part,
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('files (inum, filename, part)', rows)
|
||||
rows = []
|
||||
else:
|
||||
rows.append((
|
||||
directory_entry.info.meta.addr,
|
||||
name.replace('\'', '\'\''),
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('files (inum, filename)', rows)
|
||||
rows = []
|
||||
|
||||
try:
|
||||
sub_directory = directory_entry.as_directory()
|
||||
inode = directory_entry.info.meta.addr
|
||||
|
||||
if inode not in stack:
|
||||
rows = list_directory(
|
||||
block_db, sub_directory, part=part, stack=stack, rows=rows,
|
||||
batch_size=batch_size)
|
||||
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
stack.pop(-1)
|
||||
if not stack:
|
||||
if part:
|
||||
block_db.bulk_insert('files (inum, filename, part)', rows)
|
||||
else:
|
||||
block_db.bulk_insert('files (inum, filename)', rows)
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def get_filename_from_offset(image_path, image_hash, offset):
|
||||
"""Gets filename given a byte offset within an image.
|
||||
|
||||
Args:
|
||||
image_path: Source image path
|
||||
image_hash: Source image hash
|
||||
offset: Byte offset within the image
|
||||
|
||||
Returns:
|
||||
Filename allocated to the given offset
|
||||
"""
|
||||
img = pytsk3.Img_Info(image_path)
|
||||
|
||||
db_name = ''.join(('fs', image_hash))
|
||||
block_db = PostgresqlDataStore(db_name=db_name)
|
||||
|
||||
device_block_size = None
|
||||
partition = None
|
||||
partition_offset = None
|
||||
unalloc_part = False
|
||||
try:
|
||||
volume = pytsk3.Volume_Info(img)
|
||||
device_block_size = volume.info.block_size
|
||||
sector_offset = offset / device_block_size
|
||||
for part in volume:
|
||||
if part.start <= sector_offset < part.start + part.len:
|
||||
if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
|
||||
unalloc_part = True
|
||||
partition = part.addr
|
||||
partition_offset = part.start
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
inums = None
|
||||
if not unalloc_part:
|
||||
try:
|
||||
if not partition_offset:
|
||||
filesystem = pytsk3.FS_Info(img)
|
||||
else:
|
||||
offset -= partition_offset * device_block_size
|
||||
filesystem = pytsk3.FS_Info(
|
||||
img, offset=partition_offset * device_block_size)
|
||||
except TypeError as e:
|
||||
print(e)
|
||||
block_size = filesystem.info.block_size
|
||||
|
||||
inums = get_inums(block_db, offset / block_size, part=partition)
|
||||
|
||||
filenames = []
|
||||
if inums:
|
||||
for i in inums:
|
||||
real_inum = i[0]
|
||||
if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
|
||||
mft_record_size_offset = 0x40
|
||||
if partition_offset:
|
||||
mft_record_size_offset = \
|
||||
mft_record_size_offset + (partition_offset * device_block_size)
|
||||
mft_record_size = int.from_bytes(
|
||||
img.read(mft_record_size_offset, 1), 'little', signed=True)
|
||||
if mft_record_size < 0:
|
||||
mft_record_size = 2**(mft_record_size * -1)
|
||||
else:
|
||||
mft_record_size = mft_record_size * block_size
|
||||
real_inum = get_resident_inum(offset, filesystem, mft_record_size)
|
||||
filename = get_filename(block_db, real_inum, part=partition)
|
||||
if filename and not filenames:
|
||||
filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
|
||||
else:
|
||||
if '{0:s} ({1:d})'.format(filename, real_inum) not in filenames:
|
||||
filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
|
||||
|
||||
if not filenames:
|
||||
return 'No filenames found'
|
||||
else:
|
||||
return ' | '.join(filenames)
|
||||
|
||||
|
||||
def get_inums(block_db, block, part=None):
|
||||
"""Gets inode number from block offset.
|
||||
|
||||
Args:
|
||||
block_db: PostgreSQL database
|
||||
block: Block offset within the image
|
||||
part: Partition number
|
||||
|
||||
Returns:
|
||||
Inode number(s) of the given block or None
|
||||
"""
|
||||
if part:
|
||||
inums = block_db.query(
|
||||
'SELECT inum FROM blocks WHERE block = {0:d} AND part = {1:d}'.format(
|
||||
int(block), part))
|
||||
else:
|
||||
inums = block_db.query(
|
||||
'SELECT inum FROM blocks WHERE block = {0:d}'.format(int(block)))
|
||||
|
||||
return inums
|
||||
|
||||
|
||||
def get_resident_inum(offset, filesystem, mft_record_size):
|
||||
"""Gets the inode number associated with NTFS $MFT resident data.
|
||||
|
||||
Args:
|
||||
offset: Data offset within volume
|
||||
filesystem: pytsk3 FS_INFO object
|
||||
mft_record_size: Size of an $MFT entry
|
||||
|
||||
Returns:
|
||||
inode number of resident data
|
||||
"""
|
||||
block_size = filesystem.info.block_size
|
||||
offset_block = int(offset / block_size)
|
||||
|
||||
inode = filesystem.open_meta(0)
|
||||
mft_entry = 0
|
||||
for attr in inode:
|
||||
for run in attr:
|
||||
for block in range(run.len):
|
||||
if run.addr + block == offset_block:
|
||||
mft_entry += int(
|
||||
(offset - (offset_block * block_size)) / mft_record_size)
|
||||
return mft_entry
|
||||
else:
|
||||
mft_entry += int(block_size / mft_record_size)
|
||||
return 0
|
||||
|
||||
|
||||
def get_filename(block_db, inum, part=None):
|
||||
"""Gets filename given an inode number.
|
||||
|
||||
Args:
|
||||
block_db: PostgreSQL database
|
||||
inum: Inode number of target file
|
||||
part: Partition number
|
||||
|
||||
Returns:
|
||||
Filename of given inode or None
|
||||
"""
|
||||
if part:
|
||||
filenames = block_db.query(
|
||||
'SELECT filename FROM files WHERE inum = {0:d} AND part = {1:d}'.format(
|
||||
inum, part))
|
||||
else:
|
||||
filenames = block_db.query(
|
||||
'SELECT filename FROM files WHERE inum = {0:d}'.format(inum))
|
||||
|
||||
if filenames:
|
||||
filename = filenames[0][0]
|
||||
else:
|
||||
filename = 'No filenames found'
|
||||
|
||||
return filename
|
|
@ -228,6 +228,30 @@ class IndexSearcher():
|
|||
mft_entry += int(block_size / mft_record_size)
|
||||
return 0
|
||||
|
||||
def list_search(self, query_list):
|
||||
"""Query a list of search terms.
|
||||
|
||||
Args:
|
||||
query_list (str): path to a text file containing multiple search terms.
|
||||
"""
|
||||
for image_hash, image_path in self.images.items():
|
||||
index = ''.join(('es', image_hash))
|
||||
with open(query_list, 'r') as search_terms:
|
||||
table_data = []
|
||||
for term in search_terms:
|
||||
term = ''.join(('"', term.strip(), '"'))
|
||||
results = self.elasticsearch.search(index, term)
|
||||
hit_count = results['hits']['total']['value']
|
||||
if hit_count > 0:
|
||||
table_data.append({'Search term': term, 'Hits': hit_count})
|
||||
if table_data:
|
||||
output = tabulate(table_data, headers='keys', tablefmt='simple')
|
||||
else:
|
||||
output = 'No results.'
|
||||
log.info(
|
||||
'Searched %s (%s) for terms in %s\n\n%s\n', image_path, image_hash,
|
||||
query_list, output)
|
||||
|
||||
def search(self, query):
|
||||
"""Run a single query.
|
||||
|
||||
|
@ -256,4 +280,5 @@ class IndexSearcher():
|
|||
hits.append(hit.copy_to_dict())
|
||||
output = tabulate(hits, headers='keys', tablefmt='simple')
|
||||
log.info(
|
||||
'Returned %d results in %dms.\n%s', result_count, time_taken, output)
|
||||
'Returned %d results in %dms.\n\n%s\n', result_count, time_taken,
|
||||
output)
|
||||
|
|
Loading…
Reference in a new issue