From 0ff2bed3ef8e31b8b050793c05a669b826a29d37 Mon Sep 17 00:00:00 2001 From: Jason Solomon Date: Mon, 16 Nov 2020 14:30:34 +1100 Subject: [PATCH] Formatting test --- .gitignore | 15 ++ .pylintrc | 365 ++++++++++++++++++++++++++++++++ .style.yapf | 11 + dfdewey/datastore/__init__.py | 2 +- dfdewey/datastore/elastic.py | 33 +-- dfdewey/datastore/postgresql.py | 33 +-- dfdewey/dfdcli.py | 46 ++-- dfdewey/utils/__init__.py | 2 +- dfdewey/utils/image.py | 90 ++++---- dfdewey/yapf_test.py | 42 ++++ run_tests.py | 23 ++ setup.py | 10 +- 12 files changed, 560 insertions(+), 112 deletions(-) create mode 100644 .pylintrc create mode 100644 .style.yapf create mode 100644 dfdewey/yapf_test.py create mode 100755 run_tests.py diff --git a/.gitignore b/.gitignore index 8d5a743..3e38430 100644 --- a/.gitignore +++ b/.gitignore @@ -3,12 +3,27 @@ # Back-up files *~ +# Don't include build related files. +/build/ +/dist/ + # Egg files /dfDewey.egg-info +# Test files +.coverage + # Generic auto-generated build files *.pyc *.pyo # Specific auto-generated build files /__pycache__ + +# IDE files +.idea/ +.vscode/ + +# Pipfile +Pipfile +Pipfile.lock diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..4498033 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,365 @@ +# Original file copied from: +# https://chromium.googlesource.com/chromiumos/chromite/+/master/pylintrc + +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Profiled execution. +#profile=no + +# Add to the black list. It should be a base name, not a +# path. You may set this option multiple times. +ignore=CVS + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + .*_pb2\.py$ + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +#load-plugins= + +# Configure quote preferences. +string-quote = single-avoid-escape +triple-quote = double +docstring-quote = double + + +[MESSAGES CONTROL] + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple times. +# cros-logging-import: logging is deprecated. Use "from chromite.lib import +# cros_logging as logging" to import chromite/lib/cros_logging. +# eq-without-hash: We omit this as we don't require all objects be hashable. +# We'll wait for unittest coverage to detect missing __hash__ on objects. +# no-absolute-import: We don't seem to rely on this behavior, so don't enforce +# using this future import everywhere. +# round-builtin: We omit this as all our usage of round() is OK with either +# Python 2 or 3 behavior (and probably leans towards 3 anyways). +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable= + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells whether to display a full report or only the messages +# CHANGE: No report. +reports=no + +# Activate the evaluation score. +score=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Add a comment according to your evaluation note. This is used by the global +# evaluation report (RP0004). +comment=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +# CHANGE: Use " " instead. +indent-string=' ' + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). +ignored-classes=pytsk3 + +# When zope mode is activated, add a predefined set of Zope acquired attributes +# to generated-members. +#zope=no + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E0201 when accessed. +# CHANGE: Added 'AndRaise', 'AndReturn', 'InAnyOrder' and 'MultipleTimes' for pymox. +# CHANGE: Added tempdir for @osutils.TempDirDecorator. +#generated-members= + +# List of modules for which member attributes should not be checked. +# Modules listed here will not trigger import errors even if the linter can't +# import them. +# +# pytest: Made available by our testing virtualenv and can be assumed exists. +ignored-modules=pytest + + +[BASIC] + +# Required attributes for module, separated by a comma +#required-attributes= + +# List of builtins function names that should not be used, separated by a comma. +# exit & quit are for the interactive interpreter shell only. +# https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module +bad-functions= + apply, + exit, + filter, + input, + map, + quit, + raw_input, + reduce, + +# Regular expression which should only match correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression which should only match correct module level names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression which should only match correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression which should only match correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct method names +method-rgx=(test[A-Za-z0-9_]{2,30})|([a-z_][a-z0-9_]{2,30})$ + +# Regular expression which should only match correct instance attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct list comprehension / +# generator expression variable names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Good variable names which should always be accepted, separated by a comma +good-names=e,i,j,k,ex,Run,_,db,es + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Regular expression which should only match functions or classes name which do +# not require a docstring +no-docstring-rgx=__.*__ + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=20 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the beginning of the name of dummy variables +# (i.e. not used). +dummy-variables-rgx=_|unused_ + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +#additional-builtins= + + +[CLASSES] + +# List of interface methods to ignore, separated by a comma. This is used for +# instance to not check methods defines in Zope's Interface base class. +#ignore-iface-methods= + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=5 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branchs=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=10 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma. +# __builtin__: Use the 'six.moves.builtins' module instead +# (or 'builtins' in Python 3). +# apiclient: Use the 'googleapiclient' module instead. +# Bastion: Dropped in Python 3. +# ConfigParser: Use the 'six.moves.configparser' module instead +# (or 'configparser' in Python 3). +# cookielib: Use the 'six.moves.http_cookiejar' module instead +# (or 'http.cookiejar' in Python 3). +# cPickle: Use the 'pickle' module instead. +# cStringIO: Use 'io.StringIO' or 'io.BytesIO' instead. +# exceptions: Dropped in Python 3. +# HTMLParser: Use the 'six.moves.html_parser' module instead +# (or 'html.parser' in Python 3). +# httplib: Use the 'six.moves.http_client' module instead +# (or 'http.client' in Python 3). +# md5: Use the 'hashlib' module instead. +# mox: Use the 'mock' module instead. +# optparse: Use the 'argparse' module instead. +# Queue: Use the 'six.moves.queue' module instead (or 'queue' in Python 3). +# regsub: Use the 're' module instead. +# rexec: Dropped in Python 3. +# StringIO: Use 'io.StringIO' or 'io.BytesIO' instead. +# TERMIOS: Use the 'termios' module instead. +# urllib2: Use the 'six.moves.urllib' module instead +# (or 'urllib.request' in Python 3). +# urlparse: Use the 'six.moves.urllib' module instead +# (or 'urllib.parse' in Python 3). +deprecated-modules= + __builtin__, + apiclient, + Bastion, + ConfigParser, + cookielib, + cPickle, + cStringIO, + exceptions, + HTMLParser, + httplib, + md5, + mox, + optparse, + Queue, + regsub, + rexec, + StringIO, + TERMIOS, + urllib2, + urlparse, + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +#import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +#ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +#int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party= + _emerge, + apiclient, + elftools, + gcloud, + google, + googleapiclient, + httplib2, + jinja2, + jsonschema, + lddtree, + magic, + mock, + oauth2client, + portage, + pylint, + requests, + six, + sqlalchemy, + yaml, + + +[LOGGING] + +# Apply logging string format checks to calls on these modules. +logging-modules= + logging, diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 0000000..43b8e6e --- /dev/null +++ b/.style.yapf @@ -0,0 +1,11 @@ +# +# To run yapf for this project, invoke as such from the base directory: +# yapf -i -r --style .style.yapf ./dfdewey/ +# +[style] +based_on_style = yapf +COALESCE_BRACKETS = True +SPLIT_BEFORE_FIRST_ARGUMENT = True +SPLIT_PENALTY_AFTER_OPENING_BRACKET = 0 +SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 30 +SPLIT_BEFORE_NAMED_ASSIGNS = False diff --git a/dfdewey/datastore/__init__.py b/dfdewey/datastore/__init__.py index 692a75a..d91e05e 100644 --- a/dfdewey/datastore/__init__.py +++ b/dfdewey/datastore/__init__.py @@ -12,4 +12,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""DFDewey Datastore Package.""" \ No newline at end of file +"""DFDewey Datastore Package.""" diff --git a/dfdewey/datastore/elastic.py b/dfdewey/datastore/elastic.py index 6831eca..7872d22 100644 --- a/dfdewey/datastore/elastic.py +++ b/dfdewey/datastore/elastic.py @@ -27,7 +27,7 @@ es_logger = logging.getLogger('dfdewey.elasticsearch') es_logger.setLevel(logging.WARNING) -class ElasticsearchDataStore(object): +class ElasticsearchDataStore(): """Implements the datastore.""" # Number of events to queue up when bulk inserting events. @@ -36,7 +36,7 @@ class ElasticsearchDataStore(object): def __init__(self, host='127.0.0.1', port=9200): """Create an Elasticsearch client.""" - super(ElasticsearchDataStore, self).__init__() + super().__init__() self.client = Elasticsearch([{'host': host, 'port': port}], timeout=30) self.import_counter = collections.Counter() self.import_events = [] @@ -79,8 +79,8 @@ class ElasticsearchDataStore(object): if not self.client.indices.exists(index_name): try: self.client.indices.create(index=index_name) - except exceptions.ConnectionError: - raise RuntimeError('Unable to connect to backend datastore.') + except exceptions.ConnectionError as e: + raise RuntimeError('Unable to connect to backend datastore.') from e if not isinstance(index_name, six.text_type): index_name = codecs.decode(index_name, 'utf8') @@ -97,12 +97,11 @@ class ElasticsearchDataStore(object): try: self.client.indices.delete(index=index_name) except exceptions.ConnectionError as e: - raise RuntimeError( - 'Unable to connect to backend datastore: {}'.format(e)) + raise RuntimeError('Unable to connect to backend datastore.') from e def import_event( - self, index_name, event=None, - event_id=None, flush_interval=DEFAULT_FLUSH_INTERVAL): + self, index_name, event=None, event_id=None, + flush_interval=DEFAULT_FLUSH_INTERVAL): """Add event to Elasticsearch. Args: @@ -126,17 +125,8 @@ class ElasticsearchDataStore(object): event[k] = v # Header needed by Elasticsearch when bulk inserting. - header = { - 'index': { - '_index': index_name - } - } - update_header = { - 'update': { - '_index': index_name, - '_id': event_id - } - } + header = {'index': {'_index': index_name}} + update_header = {'update': {'_index': index_name, '_id': event_id}} if event_id: # Event has "lang" defined if there is a script used for import. @@ -182,7 +172,4 @@ class ElasticsearchDataStore(object): search_type = 'query_then_fetch' return self.client.search( - body=query_dsl, - index=index_id, - size=size, - search_type=search_type) + body=query_dsl, index=index_id, size=size, search_type=search_type) diff --git a/dfdewey/datastore/postgresql.py b/dfdewey/datastore/postgresql.py index 86c2663..921d1e8 100644 --- a/dfdewey/datastore/postgresql.py +++ b/dfdewey/datastore/postgresql.py @@ -24,22 +24,15 @@ postgresql_logger = logging.getLogger('dfdewey.postgresql') postgresql_logger.setLevel(logging.WARNING) -class PostgresqlDataStore(object): +class PostgresqlDataStore(): """Implements the datastore.""" def __init__( - self, - host='127.0.0.1', - port=5432, - db_name='dfdewey', - autocommit=False): + self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False): """Create a PostgreSQL client.""" - super(PostgresqlDataStore, self).__init__() + super().__init__() self.db = psycopg2.connect( - database=db_name, - user='dfdewey', - password='password', - host=host, + database=db_name, user='dfdewey', password='password', host=host, port=port) if autocommit: self.db.set_isolation_level( @@ -60,9 +53,7 @@ class PostgresqlDataStore(object): rows: Array of value tuples to be inserted """ extras.execute_values( - self.cursor, - 'INSERT INTO {0:s} VALUES %s'.format(table_spec), - rows) + self.cursor, 'INSERT INTO {0:s} VALUES %s'.format(table_spec), rows) def execute(self, command): """Execute a command in the PostgreSQL database. @@ -111,10 +102,7 @@ class PostgresqlDataStore(object): self.db.commit() self.db.close() self.db = psycopg2.connect( - database=db_name, - user='dfdewey', - password='password', - host=host, + database=db_name, user='dfdewey', password='password', host=host, port=port) if autocommit: self.db.set_isolation_level( @@ -131,7 +119,8 @@ class PostgresqlDataStore(object): Returns: True if the table already exists, otherwise False """ - self.cursor.execute(""" + self.cursor.execute( + """ SELECT 1 FROM information_schema.tables WHERE table_schema = '{0:s}' AND table_name = '{1:s}'""".format( table_schema, table_name)) @@ -149,9 +138,9 @@ class PostgresqlDataStore(object): Returns: True if the value exists, otherwise False """ - self.cursor.execute(""" + self.cursor.execute( + """ SELECT 1 from {0:s} - WHERE {1:s} = '{2:s}'""".format( - table_name, column_name, value)) + WHERE {1:s} = '{2:s}'""".format(table_name, column_name, value)) return self.cursor.fetchone() diff --git a/dfdewey/dfdcli.py b/dfdewey/dfdcli.py index e983f1e..9622cd6 100755 --- a/dfdewey/dfdcli.py +++ b/dfdewey/dfdcli.py @@ -25,7 +25,6 @@ from dfdewey.datastore.elastic import ElasticsearchDataStore from dfdewey.datastore.postgresql import PostgresqlDataStore from dfdewey.utils import image - STRING_INDEXING_LOG_INTERVAL = 10000000 @@ -89,10 +88,7 @@ def process_image(image_file, case, base64, gunzip, unzip): image_path = os.path.abspath(image_file) output_path = tempfile.mkdtemp() - cmd = ['bulk_extractor', - '-o', output_path, - '-x', 'all', - '-e', 'wordlist'] + cmd = ['bulk_extractor', '-o', output_path, '-x', 'all', '-e', 'wordlist'] if base64: cmd.extend(['-e', 'base64']) @@ -109,7 +105,7 @@ def process_image(image_file, case, base64, gunzip, unzip): print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd))) output = subprocess.check_output(cmd) md5_offset = output.index(b'MD5') + 19 - image_hash = output[md5_offset:md5_offset+32].decode('utf-8') + image_hash = output[md5_offset:md5_offset + 32].decode('utf-8') print('String extraction completed: {0!s}'.format(datetime.datetime.now())) print('\n*** Parsing image') @@ -211,8 +207,9 @@ def search(query, case, image_path=None, query_list=None): images[image_hash[0]] = image_path else: - print('No image specified, searching all images in case \'{0:s}\''.format( - case)) + print( + 'No image specified, searching all images in case \'{0:s}\''.format( + case)) image_hashes = case_db.query( 'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format( case)) @@ -234,29 +231,28 @@ def search(query, case, image_path=None, query_list=None): term = ''.join(('"', term.strip(), '"')) results = search_index(index, term) if results['hits']['total']['value'] > 0: - print('{0:s} - {1:d} hits'.format( - term, results['hits']['total']['value'])) + print( + '{0:s} - {1:d} hits'.format( + term, results['hits']['total']['value'])) else: print('\n*** Searching for \'{0:s}\'...'.format(query)) results = search_index(index, query) print('Returned {0:d} results:'.format(results['hits']['total']['value'])) for hit in results['hits']['hits']: filename = image.get_filename_from_offset( - image_path, - hit['_source']['image'], - int(hit['_source']['offset'])) + image_path, hit['_source']['image'], int(hit['_source']['offset'])) if hit['_source']['file_offset']: - print('Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t' - 'String: {3:s}'.format( - hit['_source']['offset'], - filename, - hit['_source']['file_offset'], - hit['_source']['data'].strip())) + print( + 'Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t' + 'String: {3:s}'.format( + hit['_source']['offset'], filename, + hit['_source']['file_offset'], + hit['_source']['data'].strip())) else: - print('Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format( - hit['_source']['offset'], - filename, - hit['_source']['data'].strip())) + print( + 'Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format( + hit['_source']['offset'], filename, + hit['_source']['data'].strip())) def search_index(index_id, search_query): @@ -278,8 +274,8 @@ def main(): args = parse_args() if not args.search and not args.search_list: process_image( - args.image, args.case, - not args.no_base64, not args.no_gzip, not args.no_zip) + args.image, args.case, not args.no_base64, not args.no_gzip, + not args.no_zip) elif args.search: search(args.search, args.case, args.image) elif args.search_list: diff --git a/dfdewey/utils/__init__.py b/dfdewey/utils/__init__.py index e2df6ad..925071d 100644 --- a/dfdewey/utils/__init__.py +++ b/dfdewey/utils/__init__.py @@ -12,4 +12,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""DFDewey Utils Package.""" \ No newline at end of file +"""DFDewey Utils Package.""" diff --git a/dfdewey/utils/image.py b/dfdewey/utils/image.py index 56de867..a070d68 100644 --- a/dfdewey/utils/image.py +++ b/dfdewey/utils/image.py @@ -14,9 +14,10 @@ # limitations under the License. """Image File Access Functions.""" -from dfdewey.datastore.postgresql import PostgresqlDataStore import pytsk3 +from dfdewey.datastore.postgresql import PostgresqlDataStore + def initialise_block_db(image_path, image_hash, case): """Creates a new image database. @@ -68,7 +69,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case): tracking_db.execute( 'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)') - tracking_db.execute(""" + tracking_db.execute( + """ CREATE TABLE image_case ( case_id TEXT, image_hash TEXT REFERENCES images(image_hash), PRIMARY KEY (case_id, image_hash))""") @@ -77,7 +79,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case): image_case_exists = False if image_exists: - image_case = tracking_db.query_single_row(""" + image_case = tracking_db.query_single_row( + """ SELECT 1 from image_case WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format( image_hash, case)) @@ -85,11 +88,13 @@ def check_tracking_database(tracking_db, image_path, image_hash, case): image_case_exists = True if not image_exists: - tracking_db.execute(""" + tracking_db.execute( + """ INSERT INTO images (image_path, image_hash) VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash)) if not image_case_exists: - tracking_db.execute(""" + tracking_db.execute( + """ INSERT INTO image_case (case_id, image_hash) VALUES ('{0:s}', '{1:s}')""".format(case, image_hash)) @@ -118,18 +123,25 @@ def populate_block_db(img, block_db, batch_size=1500): has_partition_table = True rows = [] for part in volume: - print('Parsing partition {0:d}: {1:s}'.format( - part.addr, part.desc.decode('utf-8'))) + print( + 'Parsing partition {0:d}: {1:s}'.format( + part.addr, part.desc.decode('utf-8'))) if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC: continue - fs = pytsk3.FS_Info(img, offset=part.start * volume.info.block_size) - for inode in range(fs.info.first_inum, fs.info.last_inum + 1): - file = fs.open_meta(inode) + filesystem = pytsk3.FS_Info( + img, offset=part.start * volume.info.block_size) + for inode in range(filesystem.info.first_inum, + filesystem.info.last_inum + 1): + file = filesystem.open_meta(inode) if file.info.meta.nlink > 0: for attr in file: for run in attr: for block in range(run.len): - rows.append((run.addr + block, inode, part.addr,)) + rows.append(( + run.addr + block, + inode, + part.addr, + )) if len(rows) >= batch_size: block_db.bulk_insert('blocks (block, inum, part)', rows) rows = [] @@ -137,22 +149,26 @@ def populate_block_db(img, block_db, batch_size=1500): block_db.bulk_insert('blocks (block, inum, part)', rows) # File names - directory = fs.open_dir(path='/') + directory = filesystem.open_dir(path='/') list_directory(block_db, directory, part=part.addr, batch_size=batch_size) except IOError: pass if not has_partition_table: - fs = pytsk3.FS_Info(img) + filesystem = pytsk3.FS_Info(img) rows = [] - for inode in range(fs.info.first_inum, fs.info.last_inum + 1): + for inode in range(filesystem.info.first_inum, + filesystem.info.last_inum + 1): try: - file = fs.open_meta(inode) + file = filesystem.open_meta(inode) if file.info.meta.nlink > 0: for attr in file: for run in attr: for block in range(run.len): - rows.append((run.addr + block, inode,)) + rows.append(( + run.addr + block, + inode, + )) if len(rows) >= batch_size: block_db.bulk_insert('blocks (block, inum)', rows) rows = [] @@ -162,7 +178,7 @@ def populate_block_db(img, block_db, batch_size=1500): continue # File names - directory = fs.open_dir(path='/') + directory = filesystem.open_dir(path='/') list_directory(block_db, directory, batch_size=batch_size) block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);') @@ -205,15 +221,19 @@ def list_directory( print('Unable to decode: {}'.format(directory_entry.info.name.name)) continue if part: - rows.append((directory_entry.info.meta.addr, - name.replace('\'', '\'\''), - part,)) + rows.append(( + directory_entry.info.meta.addr, + name.replace('\'', '\'\''), + part, + )) if len(rows) >= batch_size: block_db.bulk_insert('files (inum, filename, part)', rows) rows = [] else: - rows.append((directory_entry.info.meta.addr, - name.replace('\'', '\'\''),)) + rows.append(( + directory_entry.info.meta.addr, + name.replace('\'', '\'\''), + )) if len(rows) >= batch_size: block_db.bulk_insert('files (inum, filename)', rows) rows = [] @@ -224,11 +244,7 @@ def list_directory( if inode not in stack: rows = list_directory( - block_db, - sub_directory, - part=part, - stack=stack, - rows=rows, + block_db, sub_directory, part=part, stack=stack, rows=rows, batch_size=batch_size) except IOError: @@ -281,14 +297,14 @@ def get_filename_from_offset(image_path, image_hash, offset): if not unalloc_part: try: if not partition_offset: - fs = pytsk3.FS_Info(img) + filesystem = pytsk3.FS_Info(img) else: offset -= partition_offset * device_block_size - fs = pytsk3.FS_Info( + filesystem = pytsk3.FS_Info( img, offset=partition_offset * device_block_size) except TypeError as e: print(e) - block_size = fs.info.block_size + block_size = filesystem.info.block_size inums = get_inums(block_db, offset / block_size, part=partition) @@ -296,7 +312,7 @@ def get_filename_from_offset(image_path, image_hash, offset): if inums: for i in inums: real_inum = i[0] - if i[0] == 0 and fs.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT: + if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT: mft_record_size_offset = 0x40 if partition_offset: mft_record_size_offset = \ @@ -304,10 +320,10 @@ def get_filename_from_offset(image_path, image_hash, offset): mft_record_size = int.from_bytes( img.read(mft_record_size_offset, 1), 'little', signed=True) if mft_record_size < 0: - mft_record_size = 2 ** (mft_record_size * -1) + mft_record_size = 2**(mft_record_size * -1) else: mft_record_size = mft_record_size * block_size - real_inum = get_resident_inum(offset, fs, mft_record_size) + real_inum = get_resident_inum(offset, filesystem, mft_record_size) filename = get_filename(block_db, real_inum, part=partition) if filename and not filenames: filenames.append('{0:s} ({1:d})'.format(filename, real_inum)) @@ -343,21 +359,21 @@ def get_inums(block_db, block, part=None): return inums -def get_resident_inum(offset, fs, mft_record_size): +def get_resident_inum(offset, filesystem, mft_record_size): """Gets the inode number associated with NTFS $MFT resident data. Args: offset: Data offset within volume - fs: pytsk3 FS_INFO object + filesystem: pytsk3 FS_INFO object mft_record_size: Size of an $MFT entry Returns: inode number of resident data """ - block_size = fs.info.block_size + block_size = filesystem.info.block_size offset_block = int(offset / block_size) - inode = fs.open_meta(0) + inode = filesystem.open_meta(0) mft_entry = 0 for attr in inode: for run in attr: diff --git a/dfdewey/yapf_test.py b/dfdewey/yapf_test.py new file mode 100644 index 0000000..c238a8a --- /dev/null +++ b/dfdewey/yapf_test.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Enforce code style with YAPF.""" + +import os +import subprocess +import unittest + + +class StyleTest(unittest.TestCase): + """Enforce code style requirements.""" + + def testCodeStyle(self): + """Check YAPF style enforcement runs cleanly.""" + dfdewey_path = os.path.abspath(os.path.dirname(__file__)) + config_path = os.path.join(dfdewey_path, '..', '.style.yapf') + try: + subprocess.check_output( + ['yapf', '--style', config_path, '--diff', '-r', dfdewey_path]) + except subprocess.CalledProcessError as e: + if hasattr(e, 'output'): + raise Exception( + 'Run "yapf --style {0:s} -i -r {1:s}" ' + 'to correct these problems: {2:s}'.format( + config_path, dfdewey_path, e.output.decode('utf-8'))) from e + raise + + +if __name__ == '__main__': + unittest.main() diff --git a/run_tests.py b/run_tests.py new file mode 100755 index 0000000..860c497 --- /dev/null +++ b/run_tests.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Script to run tests.""" + +import subprocess + +if __name__ == '__main__': + subprocess.check_call([ + 'nosetests', '-vv', '--with-coverage', '--cover-package=dfdewey', '--exe' + ]) diff --git a/setup.py b/setup.py index 805b9b0..e0964c5 100644 --- a/setup.py +++ b/setup.py @@ -20,9 +20,13 @@ import sys from setuptools import find_packages from setuptools import setup +import dfdewey + sys.path.insert(0, '.') -import dfdewey +DFDEWEY_DESCRIPTION = ( + 'dfDewey is a digital forensics string extraction, indexing, and searching ' + 'tool.') requirements = [] with open('requirements.txt','r') as f: @@ -30,7 +34,7 @@ with open('requirements.txt','r') as f: setup( name='dfDewey', version=dfdewey.__version__, - description='dfDewey is a digital forensics string extraction, indexing, and searching tool.', + description=DFDEWEY_DESCRIPTION, license='Apache License, Version 2.0', maintainer='dfDewey development team', maintainer_email='dfdewey-dev@googlegroups.com', @@ -38,6 +42,6 @@ setup( include_package_data=True, install_requires=requirements, extras_require={ - 'dev': [] + 'dev': ['mock', 'nose', 'yapf', 'coverage'] } )