Formatting test

This commit is contained in:
Jason Solomon 2020-11-16 14:30:34 +11:00
parent d277394b5c
commit 0ff2bed3ef
12 changed files with 560 additions and 112 deletions

15
.gitignore vendored
View file

@ -3,12 +3,27 @@
# Back-up files # Back-up files
*~ *~
# Don't include build related files.
/build/
/dist/
# Egg files # Egg files
/dfDewey.egg-info /dfDewey.egg-info
# Test files
.coverage
# Generic auto-generated build files # Generic auto-generated build files
*.pyc *.pyc
*.pyo *.pyo
# Specific auto-generated build files # Specific auto-generated build files
/__pycache__ /__pycache__
# IDE files
.idea/
.vscode/
# Pipfile
Pipfile
Pipfile.lock

365
.pylintrc Normal file
View file

@ -0,0 +1,365 @@
# Original file copied from:
# https://chromium.googlesource.com/chromiumos/chromite/+/master/pylintrc
[MASTER]
# Specify a configuration file.
#rcfile=
# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=
# Profiled execution.
#profile=no
# Add <file or directory> to the black list. It should be a base name, not a
# path. You may set this option multiple times.
ignore=CVS
# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.
ignore-patterns=
.*_pb2\.py$
# Pickle collected data for later comparisons.
persistent=yes
# List of plugins (as comma separated values of python modules names) to load,
# usually to register additional checkers.
#load-plugins=
# Configure quote preferences.
string-quote = single-avoid-escape
triple-quote = double
docstring-quote = double
[MESSAGES CONTROL]
# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple times.
# cros-logging-import: logging is deprecated. Use "from chromite.lib import
# cros_logging as logging" to import chromite/lib/cros_logging.
# eq-without-hash: We omit this as we don't require all objects be hashable.
# We'll wait for unittest coverage to detect missing __hash__ on objects.
# no-absolute-import: We don't seem to rely on this behavior, so don't enforce
# using this future import everywhere.
# round-builtin: We omit this as all our usage of round() is OK with either
# Python 2 or 3 behavior (and probably leans towards 3 anyways).
#enable=
# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then reenable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=
[REPORTS]
# Set the output format. Available formats are text, parseable, colorized, msvs
# (visual studio) and html
output-format=text
# Put messages in a separate file for each module / package specified on the
# command line instead of printing them on stdout. Reports (if any) will be
# written in a file name "pylint_global.[txt|html]".
files-output=no
# Tells whether to display a full report or only the messages
# CHANGE: No report.
reports=no
# Activate the evaluation score.
score=no
# Python expression which should return a note less than 10 (10 is the highest
# note). You have access to the variables errors warning, statement which
# respectively contain the number of errors / warnings messages and the total
# number of statements analyzed. This is used by the global evaluation report
# (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
# Add a comment according to your evaluation note. This is used by the global
# evaluation report (RP0004).
comment=no
[MISCELLANEOUS]
# List of note tags to take in consideration, separated by a comma.
notes=FIXME,XXX,TODO
[FORMAT]
# Maximum number of characters on a single line.
max-line-length=80
# Maximum number of lines in a module
max-module-lines=1000
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab).
# CHANGE: Use " " instead.
indent-string=' '
[TYPECHECK]
# Tells whether missing members accessed in mixin class should be ignored. A
# mixin class is detected if its name ends with "mixin" (case insensitive).
ignore-mixin-members=yes
# List of classes names for which member attributes should not be checked
# (useful for classes with attributes dynamically set).
ignored-classes=pytsk3
# When zope mode is activated, add a predefined set of Zope acquired attributes
# to generated-members.
#zope=no
# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E0201 when accessed.
# CHANGE: Added 'AndRaise', 'AndReturn', 'InAnyOrder' and 'MultipleTimes' for pymox.
# CHANGE: Added tempdir for @osutils.TempDirDecorator.
#generated-members=
# List of modules for which member attributes should not be checked.
# Modules listed here will not trigger import errors even if the linter can't
# import them.
#
# pytest: Made available by our testing virtualenv and can be assumed exists.
ignored-modules=pytest
[BASIC]
# Required attributes for module, separated by a comma
#required-attributes=
# List of builtins function names that should not be used, separated by a comma.
# exit & quit are for the interactive interpreter shell only.
# https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module
bad-functions=
apply,
exit,
filter,
input,
map,
quit,
raw_input,
reduce,
# Regular expression which should only match correct module names
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
# Regular expression which should only match correct module level names
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
# Regular expression which should only match correct class names
class-rgx=[A-Z_][a-zA-Z0-9]+$
# Regular expression which should only match correct function names
function-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct method names
method-rgx=(test[A-Za-z0-9_]{2,30})|([a-z_][a-z0-9_]{2,30})$
# Regular expression which should only match correct instance attribute names
attr-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct argument names
argument-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct variable names
variable-rgx=[a-z_][a-z0-9_]{2,30}$
# Regular expression which should only match correct list comprehension /
# generator expression variable names
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
# Good variable names which should always be accepted, separated by a comma
good-names=e,i,j,k,ex,Run,_,db,es
# Bad variable names which should always be refused, separated by a comma
bad-names=foo,bar,baz,toto,tutu,tata
# Regular expression which should only match functions or classes name which do
# not require a docstring
no-docstring-rgx=__.*__
[SIMILARITIES]
# Minimum lines number of a similarity.
min-similarity-lines=20
# Ignore comments when computing similarities.
ignore-comments=yes
# Ignore docstrings when computing similarities.
ignore-docstrings=yes
[VARIABLES]
# Tells whether we should check for unused import in __init__ files.
init-import=no
# A regular expression matching the beginning of the name of dummy variables
# (i.e. not used).
dummy-variables-rgx=_|unused_
# List of additional names supposed to be defined in builtins. Remember that
# you should avoid to define new builtins when possible.
#additional-builtins=
[CLASSES]
# List of interface methods to ignore, separated by a comma. This is used for
# instance to not check methods defines in Zope's Interface base class.
#ignore-iface-methods=
# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,__new__,setUp
[DESIGN]
# Maximum number of arguments for function / method
max-args=5
# Argument names that match this expression will be ignored. Default to name
# with leading underscore
ignored-argument-names=_.*
# Maximum number of locals for function / method body
max-locals=15
# Maximum number of return / yield for function / method body
max-returns=6
# Maximum number of branch for function / method body
max-branchs=12
# Maximum number of statements in function / method body
max-statements=50
# Maximum number of parents for a class (see R0901).
max-parents=10
# Maximum number of attributes for a class (see R0902).
max-attributes=7
# Minimum number of public methods for a class (see R0903).
min-public-methods=2
# Maximum number of public methods for a class (see R0904).
max-public-methods=20
[IMPORTS]
# Deprecated modules which should not be used, separated by a comma.
# __builtin__: Use the 'six.moves.builtins' module instead
# (or 'builtins' in Python 3).
# apiclient: Use the 'googleapiclient' module instead.
# Bastion: Dropped in Python 3.
# ConfigParser: Use the 'six.moves.configparser' module instead
# (or 'configparser' in Python 3).
# cookielib: Use the 'six.moves.http_cookiejar' module instead
# (or 'http.cookiejar' in Python 3).
# cPickle: Use the 'pickle' module instead.
# cStringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
# exceptions: Dropped in Python 3.
# HTMLParser: Use the 'six.moves.html_parser' module instead
# (or 'html.parser' in Python 3).
# httplib: Use the 'six.moves.http_client' module instead
# (or 'http.client' in Python 3).
# md5: Use the 'hashlib' module instead.
# mox: Use the 'mock' module instead.
# optparse: Use the 'argparse' module instead.
# Queue: Use the 'six.moves.queue' module instead (or 'queue' in Python 3).
# regsub: Use the 're' module instead.
# rexec: Dropped in Python 3.
# StringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
# TERMIOS: Use the 'termios' module instead.
# urllib2: Use the 'six.moves.urllib' module instead
# (or 'urllib.request' in Python 3).
# urlparse: Use the 'six.moves.urllib' module instead
# (or 'urllib.parse' in Python 3).
deprecated-modules=
__builtin__,
apiclient,
Bastion,
ConfigParser,
cookielib,
cPickle,
cStringIO,
exceptions,
HTMLParser,
httplib,
md5,
mox,
optparse,
Queue,
regsub,
rexec,
StringIO,
TERMIOS,
urllib2,
urlparse,
# Create a graph of every (i.e. internal and external) dependencies in the
# given file (report RP0402 must not be disabled)
#import-graph=
# Create a graph of external dependencies in the given file (report RP0402 must
# not be disabled)
#ext-import-graph=
# Create a graph of internal dependencies in the given file (report RP0402 must
# not be disabled)
#int-import-graph=
# Force import order to recognize a module as part of the standard
# compatibility libraries.
known-standard-library=
# Force import order to recognize a module as part of a third party library.
known-third-party=
_emerge,
apiclient,
elftools,
gcloud,
google,
googleapiclient,
httplib2,
jinja2,
jsonschema,
lddtree,
magic,
mock,
oauth2client,
portage,
pylint,
requests,
six,
sqlalchemy,
yaml,
[LOGGING]
# Apply logging string format checks to calls on these modules.
logging-modules=
logging,

11
.style.yapf Normal file
View file

@ -0,0 +1,11 @@
#
# To run yapf for this project, invoke as such from the base directory:
# yapf -i -r --style .style.yapf ./dfdewey/
#
[style]
based_on_style = yapf
COALESCE_BRACKETS = True
SPLIT_BEFORE_FIRST_ARGUMENT = True
SPLIT_PENALTY_AFTER_OPENING_BRACKET = 0
SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 30
SPLIT_BEFORE_NAMED_ASSIGNS = False

View file

@ -27,7 +27,7 @@ es_logger = logging.getLogger('dfdewey.elasticsearch')
es_logger.setLevel(logging.WARNING) es_logger.setLevel(logging.WARNING)
class ElasticsearchDataStore(object): class ElasticsearchDataStore():
"""Implements the datastore.""" """Implements the datastore."""
# Number of events to queue up when bulk inserting events. # Number of events to queue up when bulk inserting events.
@ -36,7 +36,7 @@ class ElasticsearchDataStore(object):
def __init__(self, host='127.0.0.1', port=9200): def __init__(self, host='127.0.0.1', port=9200):
"""Create an Elasticsearch client.""" """Create an Elasticsearch client."""
super(ElasticsearchDataStore, self).__init__() super().__init__()
self.client = Elasticsearch([{'host': host, 'port': port}], timeout=30) self.client = Elasticsearch([{'host': host, 'port': port}], timeout=30)
self.import_counter = collections.Counter() self.import_counter = collections.Counter()
self.import_events = [] self.import_events = []
@ -79,8 +79,8 @@ class ElasticsearchDataStore(object):
if not self.client.indices.exists(index_name): if not self.client.indices.exists(index_name):
try: try:
self.client.indices.create(index=index_name) self.client.indices.create(index=index_name)
except exceptions.ConnectionError: except exceptions.ConnectionError as e:
raise RuntimeError('Unable to connect to backend datastore.') raise RuntimeError('Unable to connect to backend datastore.') from e
if not isinstance(index_name, six.text_type): if not isinstance(index_name, six.text_type):
index_name = codecs.decode(index_name, 'utf8') index_name = codecs.decode(index_name, 'utf8')
@ -97,12 +97,11 @@ class ElasticsearchDataStore(object):
try: try:
self.client.indices.delete(index=index_name) self.client.indices.delete(index=index_name)
except exceptions.ConnectionError as e: except exceptions.ConnectionError as e:
raise RuntimeError( raise RuntimeError('Unable to connect to backend datastore.') from e
'Unable to connect to backend datastore: {}'.format(e))
def import_event( def import_event(
self, index_name, event=None, self, index_name, event=None, event_id=None,
event_id=None, flush_interval=DEFAULT_FLUSH_INTERVAL): flush_interval=DEFAULT_FLUSH_INTERVAL):
"""Add event to Elasticsearch. """Add event to Elasticsearch.
Args: Args:
@ -126,17 +125,8 @@ class ElasticsearchDataStore(object):
event[k] = v event[k] = v
# Header needed by Elasticsearch when bulk inserting. # Header needed by Elasticsearch when bulk inserting.
header = { header = {'index': {'_index': index_name}}
'index': { update_header = {'update': {'_index': index_name, '_id': event_id}}
'_index': index_name
}
}
update_header = {
'update': {
'_index': index_name,
'_id': event_id
}
}
if event_id: if event_id:
# Event has "lang" defined if there is a script used for import. # Event has "lang" defined if there is a script used for import.
@ -182,7 +172,4 @@ class ElasticsearchDataStore(object):
search_type = 'query_then_fetch' search_type = 'query_then_fetch'
return self.client.search( return self.client.search(
body=query_dsl, body=query_dsl, index=index_id, size=size, search_type=search_type)
index=index_id,
size=size,
search_type=search_type)

View file

@ -24,22 +24,15 @@ postgresql_logger = logging.getLogger('dfdewey.postgresql')
postgresql_logger.setLevel(logging.WARNING) postgresql_logger.setLevel(logging.WARNING)
class PostgresqlDataStore(object): class PostgresqlDataStore():
"""Implements the datastore.""" """Implements the datastore."""
def __init__( def __init__(
self, self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
host='127.0.0.1',
port=5432,
db_name='dfdewey',
autocommit=False):
"""Create a PostgreSQL client.""" """Create a PostgreSQL client."""
super(PostgresqlDataStore, self).__init__() super().__init__()
self.db = psycopg2.connect( self.db = psycopg2.connect(
database=db_name, database=db_name, user='dfdewey', password='password', host=host,
user='dfdewey',
password='password',
host=host,
port=port) port=port)
if autocommit: if autocommit:
self.db.set_isolation_level( self.db.set_isolation_level(
@ -60,9 +53,7 @@ class PostgresqlDataStore(object):
rows: Array of value tuples to be inserted rows: Array of value tuples to be inserted
""" """
extras.execute_values( extras.execute_values(
self.cursor, self.cursor, 'INSERT INTO {0:s} VALUES %s'.format(table_spec), rows)
'INSERT INTO {0:s} VALUES %s'.format(table_spec),
rows)
def execute(self, command): def execute(self, command):
"""Execute a command in the PostgreSQL database. """Execute a command in the PostgreSQL database.
@ -111,10 +102,7 @@ class PostgresqlDataStore(object):
self.db.commit() self.db.commit()
self.db.close() self.db.close()
self.db = psycopg2.connect( self.db = psycopg2.connect(
database=db_name, database=db_name, user='dfdewey', password='password', host=host,
user='dfdewey',
password='password',
host=host,
port=port) port=port)
if autocommit: if autocommit:
self.db.set_isolation_level( self.db.set_isolation_level(
@ -131,7 +119,8 @@ class PostgresqlDataStore(object):
Returns: Returns:
True if the table already exists, otherwise False True if the table already exists, otherwise False
""" """
self.cursor.execute(""" self.cursor.execute(
"""
SELECT 1 FROM information_schema.tables SELECT 1 FROM information_schema.tables
WHERE table_schema = '{0:s}' AND table_name = '{1:s}'""".format( WHERE table_schema = '{0:s}' AND table_name = '{1:s}'""".format(
table_schema, table_name)) table_schema, table_name))
@ -149,9 +138,9 @@ class PostgresqlDataStore(object):
Returns: Returns:
True if the value exists, otherwise False True if the value exists, otherwise False
""" """
self.cursor.execute(""" self.cursor.execute(
"""
SELECT 1 from {0:s} SELECT 1 from {0:s}
WHERE {1:s} = '{2:s}'""".format( WHERE {1:s} = '{2:s}'""".format(table_name, column_name, value))
table_name, column_name, value))
return self.cursor.fetchone() return self.cursor.fetchone()

View file

@ -25,7 +25,6 @@ from dfdewey.datastore.elastic import ElasticsearchDataStore
from dfdewey.datastore.postgresql import PostgresqlDataStore from dfdewey.datastore.postgresql import PostgresqlDataStore
from dfdewey.utils import image from dfdewey.utils import image
STRING_INDEXING_LOG_INTERVAL = 10000000 STRING_INDEXING_LOG_INTERVAL = 10000000
@ -89,10 +88,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
image_path = os.path.abspath(image_file) image_path = os.path.abspath(image_file)
output_path = tempfile.mkdtemp() output_path = tempfile.mkdtemp()
cmd = ['bulk_extractor', cmd = ['bulk_extractor', '-o', output_path, '-x', 'all', '-e', 'wordlist']
'-o', output_path,
'-x', 'all',
'-e', 'wordlist']
if base64: if base64:
cmd.extend(['-e', 'base64']) cmd.extend(['-e', 'base64'])
@ -109,7 +105,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd))) print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd)))
output = subprocess.check_output(cmd) output = subprocess.check_output(cmd)
md5_offset = output.index(b'MD5') + 19 md5_offset = output.index(b'MD5') + 19
image_hash = output[md5_offset:md5_offset+32].decode('utf-8') image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
print('String extraction completed: {0!s}'.format(datetime.datetime.now())) print('String extraction completed: {0!s}'.format(datetime.datetime.now()))
print('\n*** Parsing image') print('\n*** Parsing image')
@ -211,8 +207,9 @@ def search(query, case, image_path=None, query_list=None):
images[image_hash[0]] = image_path images[image_hash[0]] = image_path
else: else:
print('No image specified, searching all images in case \'{0:s}\''.format( print(
case)) 'No image specified, searching all images in case \'{0:s}\''.format(
case))
image_hashes = case_db.query( image_hashes = case_db.query(
'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format( 'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format(
case)) case))
@ -234,29 +231,28 @@ def search(query, case, image_path=None, query_list=None):
term = ''.join(('"', term.strip(), '"')) term = ''.join(('"', term.strip(), '"'))
results = search_index(index, term) results = search_index(index, term)
if results['hits']['total']['value'] > 0: if results['hits']['total']['value'] > 0:
print('{0:s} - {1:d} hits'.format( print(
term, results['hits']['total']['value'])) '{0:s} - {1:d} hits'.format(
term, results['hits']['total']['value']))
else: else:
print('\n*** Searching for \'{0:s}\'...'.format(query)) print('\n*** Searching for \'{0:s}\'...'.format(query))
results = search_index(index, query) results = search_index(index, query)
print('Returned {0:d} results:'.format(results['hits']['total']['value'])) print('Returned {0:d} results:'.format(results['hits']['total']['value']))
for hit in results['hits']['hits']: for hit in results['hits']['hits']:
filename = image.get_filename_from_offset( filename = image.get_filename_from_offset(
image_path, image_path, hit['_source']['image'], int(hit['_source']['offset']))
hit['_source']['image'],
int(hit['_source']['offset']))
if hit['_source']['file_offset']: if hit['_source']['file_offset']:
print('Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t' print(
'String: {3:s}'.format( 'Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
hit['_source']['offset'], 'String: {3:s}'.format(
filename, hit['_source']['offset'], filename,
hit['_source']['file_offset'], hit['_source']['file_offset'],
hit['_source']['data'].strip())) hit['_source']['data'].strip()))
else: else:
print('Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format( print(
hit['_source']['offset'], 'Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
filename, hit['_source']['offset'], filename,
hit['_source']['data'].strip())) hit['_source']['data'].strip()))
def search_index(index_id, search_query): def search_index(index_id, search_query):
@ -278,8 +274,8 @@ def main():
args = parse_args() args = parse_args()
if not args.search and not args.search_list: if not args.search and not args.search_list:
process_image( process_image(
args.image, args.case, args.image, args.case, not args.no_base64, not args.no_gzip,
not args.no_base64, not args.no_gzip, not args.no_zip) not args.no_zip)
elif args.search: elif args.search:
search(args.search, args.case, args.image) search(args.search, args.case, args.image)
elif args.search_list: elif args.search_list:

View file

@ -14,9 +14,10 @@
# limitations under the License. # limitations under the License.
"""Image File Access Functions.""" """Image File Access Functions."""
from dfdewey.datastore.postgresql import PostgresqlDataStore
import pytsk3 import pytsk3
from dfdewey.datastore.postgresql import PostgresqlDataStore
def initialise_block_db(image_path, image_hash, case): def initialise_block_db(image_path, image_hash, case):
"""Creates a new image database. """Creates a new image database.
@ -68,7 +69,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
tracking_db.execute( tracking_db.execute(
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)') 'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
tracking_db.execute(""" tracking_db.execute(
"""
CREATE TABLE image_case ( CREATE TABLE image_case (
case_id TEXT, image_hash TEXT REFERENCES images(image_hash), case_id TEXT, image_hash TEXT REFERENCES images(image_hash),
PRIMARY KEY (case_id, image_hash))""") PRIMARY KEY (case_id, image_hash))""")
@ -77,7 +79,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
image_case_exists = False image_case_exists = False
if image_exists: if image_exists:
image_case = tracking_db.query_single_row(""" image_case = tracking_db.query_single_row(
"""
SELECT 1 from image_case SELECT 1 from image_case
WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format( WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format(
image_hash, case)) image_hash, case))
@ -85,11 +88,13 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
image_case_exists = True image_case_exists = True
if not image_exists: if not image_exists:
tracking_db.execute(""" tracking_db.execute(
"""
INSERT INTO images (image_path, image_hash) INSERT INTO images (image_path, image_hash)
VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash)) VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash))
if not image_case_exists: if not image_case_exists:
tracking_db.execute(""" tracking_db.execute(
"""
INSERT INTO image_case (case_id, image_hash) INSERT INTO image_case (case_id, image_hash)
VALUES ('{0:s}', '{1:s}')""".format(case, image_hash)) VALUES ('{0:s}', '{1:s}')""".format(case, image_hash))
@ -118,18 +123,25 @@ def populate_block_db(img, block_db, batch_size=1500):
has_partition_table = True has_partition_table = True
rows = [] rows = []
for part in volume: for part in volume:
print('Parsing partition {0:d}: {1:s}'.format( print(
part.addr, part.desc.decode('utf-8'))) 'Parsing partition {0:d}: {1:s}'.format(
part.addr, part.desc.decode('utf-8')))
if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC: if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
continue continue
fs = pytsk3.FS_Info(img, offset=part.start * volume.info.block_size) filesystem = pytsk3.FS_Info(
for inode in range(fs.info.first_inum, fs.info.last_inum + 1): img, offset=part.start * volume.info.block_size)
file = fs.open_meta(inode) for inode in range(filesystem.info.first_inum,
filesystem.info.last_inum + 1):
file = filesystem.open_meta(inode)
if file.info.meta.nlink > 0: if file.info.meta.nlink > 0:
for attr in file: for attr in file:
for run in attr: for run in attr:
for block in range(run.len): for block in range(run.len):
rows.append((run.addr + block, inode, part.addr,)) rows.append((
run.addr + block,
inode,
part.addr,
))
if len(rows) >= batch_size: if len(rows) >= batch_size:
block_db.bulk_insert('blocks (block, inum, part)', rows) block_db.bulk_insert('blocks (block, inum, part)', rows)
rows = [] rows = []
@ -137,22 +149,26 @@ def populate_block_db(img, block_db, batch_size=1500):
block_db.bulk_insert('blocks (block, inum, part)', rows) block_db.bulk_insert('blocks (block, inum, part)', rows)
# File names # File names
directory = fs.open_dir(path='/') directory = filesystem.open_dir(path='/')
list_directory(block_db, directory, part=part.addr, batch_size=batch_size) list_directory(block_db, directory, part=part.addr, batch_size=batch_size)
except IOError: except IOError:
pass pass
if not has_partition_table: if not has_partition_table:
fs = pytsk3.FS_Info(img) filesystem = pytsk3.FS_Info(img)
rows = [] rows = []
for inode in range(fs.info.first_inum, fs.info.last_inum + 1): for inode in range(filesystem.info.first_inum,
filesystem.info.last_inum + 1):
try: try:
file = fs.open_meta(inode) file = filesystem.open_meta(inode)
if file.info.meta.nlink > 0: if file.info.meta.nlink > 0:
for attr in file: for attr in file:
for run in attr: for run in attr:
for block in range(run.len): for block in range(run.len):
rows.append((run.addr + block, inode,)) rows.append((
run.addr + block,
inode,
))
if len(rows) >= batch_size: if len(rows) >= batch_size:
block_db.bulk_insert('blocks (block, inum)', rows) block_db.bulk_insert('blocks (block, inum)', rows)
rows = [] rows = []
@ -162,7 +178,7 @@ def populate_block_db(img, block_db, batch_size=1500):
continue continue
# File names # File names
directory = fs.open_dir(path='/') directory = filesystem.open_dir(path='/')
list_directory(block_db, directory, batch_size=batch_size) list_directory(block_db, directory, batch_size=batch_size)
block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);') block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);')
@ -205,15 +221,19 @@ def list_directory(
print('Unable to decode: {}'.format(directory_entry.info.name.name)) print('Unable to decode: {}'.format(directory_entry.info.name.name))
continue continue
if part: if part:
rows.append((directory_entry.info.meta.addr, rows.append((
name.replace('\'', '\'\''), directory_entry.info.meta.addr,
part,)) name.replace('\'', '\'\''),
part,
))
if len(rows) >= batch_size: if len(rows) >= batch_size:
block_db.bulk_insert('files (inum, filename, part)', rows) block_db.bulk_insert('files (inum, filename, part)', rows)
rows = [] rows = []
else: else:
rows.append((directory_entry.info.meta.addr, rows.append((
name.replace('\'', '\'\''),)) directory_entry.info.meta.addr,
name.replace('\'', '\'\''),
))
if len(rows) >= batch_size: if len(rows) >= batch_size:
block_db.bulk_insert('files (inum, filename)', rows) block_db.bulk_insert('files (inum, filename)', rows)
rows = [] rows = []
@ -224,11 +244,7 @@ def list_directory(
if inode not in stack: if inode not in stack:
rows = list_directory( rows = list_directory(
block_db, block_db, sub_directory, part=part, stack=stack, rows=rows,
sub_directory,
part=part,
stack=stack,
rows=rows,
batch_size=batch_size) batch_size=batch_size)
except IOError: except IOError:
@ -281,14 +297,14 @@ def get_filename_from_offset(image_path, image_hash, offset):
if not unalloc_part: if not unalloc_part:
try: try:
if not partition_offset: if not partition_offset:
fs = pytsk3.FS_Info(img) filesystem = pytsk3.FS_Info(img)
else: else:
offset -= partition_offset * device_block_size offset -= partition_offset * device_block_size
fs = pytsk3.FS_Info( filesystem = pytsk3.FS_Info(
img, offset=partition_offset * device_block_size) img, offset=partition_offset * device_block_size)
except TypeError as e: except TypeError as e:
print(e) print(e)
block_size = fs.info.block_size block_size = filesystem.info.block_size
inums = get_inums(block_db, offset / block_size, part=partition) inums = get_inums(block_db, offset / block_size, part=partition)
@ -296,7 +312,7 @@ def get_filename_from_offset(image_path, image_hash, offset):
if inums: if inums:
for i in inums: for i in inums:
real_inum = i[0] real_inum = i[0]
if i[0] == 0 and fs.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT: if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
mft_record_size_offset = 0x40 mft_record_size_offset = 0x40
if partition_offset: if partition_offset:
mft_record_size_offset = \ mft_record_size_offset = \
@ -304,10 +320,10 @@ def get_filename_from_offset(image_path, image_hash, offset):
mft_record_size = int.from_bytes( mft_record_size = int.from_bytes(
img.read(mft_record_size_offset, 1), 'little', signed=True) img.read(mft_record_size_offset, 1), 'little', signed=True)
if mft_record_size < 0: if mft_record_size < 0:
mft_record_size = 2 ** (mft_record_size * -1) mft_record_size = 2**(mft_record_size * -1)
else: else:
mft_record_size = mft_record_size * block_size mft_record_size = mft_record_size * block_size
real_inum = get_resident_inum(offset, fs, mft_record_size) real_inum = get_resident_inum(offset, filesystem, mft_record_size)
filename = get_filename(block_db, real_inum, part=partition) filename = get_filename(block_db, real_inum, part=partition)
if filename and not filenames: if filename and not filenames:
filenames.append('{0:s} ({1:d})'.format(filename, real_inum)) filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
@ -343,21 +359,21 @@ def get_inums(block_db, block, part=None):
return inums return inums
def get_resident_inum(offset, fs, mft_record_size): def get_resident_inum(offset, filesystem, mft_record_size):
"""Gets the inode number associated with NTFS $MFT resident data. """Gets the inode number associated with NTFS $MFT resident data.
Args: Args:
offset: Data offset within volume offset: Data offset within volume
fs: pytsk3 FS_INFO object filesystem: pytsk3 FS_INFO object
mft_record_size: Size of an $MFT entry mft_record_size: Size of an $MFT entry
Returns: Returns:
inode number of resident data inode number of resident data
""" """
block_size = fs.info.block_size block_size = filesystem.info.block_size
offset_block = int(offset / block_size) offset_block = int(offset / block_size)
inode = fs.open_meta(0) inode = filesystem.open_meta(0)
mft_entry = 0 mft_entry = 0
for attr in inode: for attr in inode:
for run in attr: for run in attr:

42
dfdewey/yapf_test.py Normal file
View file

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Enforce code style with YAPF."""
import os
import subprocess
import unittest
class StyleTest(unittest.TestCase):
"""Enforce code style requirements."""
def testCodeStyle(self):
"""Check YAPF style enforcement runs cleanly."""
dfdewey_path = os.path.abspath(os.path.dirname(__file__))
config_path = os.path.join(dfdewey_path, '..', '.style.yapf')
try:
subprocess.check_output(
['yapf', '--style', config_path, '--diff', '-r', dfdewey_path])
except subprocess.CalledProcessError as e:
if hasattr(e, 'output'):
raise Exception(
'Run "yapf --style {0:s} -i -r {1:s}" '
'to correct these problems: {2:s}'.format(
config_path, dfdewey_path, e.output.decode('utf-8'))) from e
raise
if __name__ == '__main__':
unittest.main()

23
run_tests.py Executable file
View file

@ -0,0 +1,23 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to run tests."""
import subprocess
if __name__ == '__main__':
subprocess.check_call([
'nosetests', '-vv', '--with-coverage', '--cover-package=dfdewey', '--exe'
])

View file

@ -20,9 +20,13 @@ import sys
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
import dfdewey
sys.path.insert(0, '.') sys.path.insert(0, '.')
import dfdewey DFDEWEY_DESCRIPTION = (
'dfDewey is a digital forensics string extraction, indexing, and searching '
'tool.')
requirements = [] requirements = []
with open('requirements.txt','r') as f: with open('requirements.txt','r') as f:
@ -30,7 +34,7 @@ with open('requirements.txt','r') as f:
setup( setup(
name='dfDewey', name='dfDewey',
version=dfdewey.__version__, version=dfdewey.__version__,
description='dfDewey is a digital forensics string extraction, indexing, and searching tool.', description=DFDEWEY_DESCRIPTION,
license='Apache License, Version 2.0', license='Apache License, Version 2.0',
maintainer='dfDewey development team', maintainer='dfDewey development team',
maintainer_email='dfdewey-dev@googlegroups.com', maintainer_email='dfdewey-dev@googlegroups.com',
@ -38,6 +42,6 @@ setup(
include_package_data=True, include_package_data=True,
install_requires=requirements, install_requires=requirements,
extras_require={ extras_require={
'dev': [] 'dev': ['mock', 'nose', 'yapf', 'coverage']
} }
) )