Formatting test

2020-11-16 14:30:34 +11:00 · 2020-11-16 14:30:34 +11:00 · 0ff2bed3ef
commit 0ff2bed3ef
parent d277394b5c
12 changed files with 560 additions and 112 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,12 +3,27 @@
 # Back-up files
 *~

+# Don't include build related files.
+/build/
+/dist/
+
 # Egg files
 /dfDewey.egg-info

+# Test files
+.coverage
+
 # Generic auto-generated build files
 *.pyc
 *.pyo

 # Specific auto-generated build files
 /__pycache__
+
+# IDE files
+.idea/
+.vscode/
+
+# Pipfile
+Pipfile
+Pipfile.lock
--- a/.pylintrc
+++ b/.pylintrc
@ -0,0 +1,365 @@
+# Original file copied from:
+# https://chromium.googlesource.com/chromiumos/chromite/+/master/pylintrc
+
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Profiled execution.
+#profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=CVS
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+    .*_pb2\.py$
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+#load-plugins=
+
+# Configure quote preferences.
+string-quote = single-avoid-escape
+triple-quote = double
+docstring-quote = double
+
+
+[MESSAGES CONTROL]
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple times.
+# cros-logging-import: logging is deprecated. Use "from chromite.lib import
+#   cros_logging as logging" to import chromite/lib/cros_logging.
+# eq-without-hash: We omit this as we don't require all objects be hashable.
+#   We'll wait for unittest coverage to detect missing __hash__ on objects.
+# no-absolute-import: We don't seem to rely on this behavior, so don't enforce
+#   using this future import everywhere.
+# round-builtin: We omit this as all our usage of round() is OK with either
+#   Python 2 or 3 behavior (and probably leans towards 3 anyways).
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html
+output-format=text
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+# CHANGE: No report.
+reports=no
+
+# Activate the evaluation score.
+score=no
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (RP0004).
+comment=no
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+# CHANGE: Use "  " instead.
+indent-string='  '
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of classes names for which member attributes should not be checked
+# (useful for classes with attributes dynamically set).
+ignored-classes=pytsk3
+
+# When zope mode is activated, add a predefined set of Zope acquired attributes
+# to generated-members.
+#zope=no
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E0201 when accessed.
+# CHANGE: Added 'AndRaise', 'AndReturn', 'InAnyOrder' and 'MultipleTimes' for pymox.
+# CHANGE: Added tempdir for @osutils.TempDirDecorator.
+#generated-members=
+
+# List of modules for which member attributes should not be checked.
+# Modules listed here will not trigger import errors even if the linter can't
+# import them.
+#
+# pytest: Made available by our testing virtualenv and can be assumed exists.
+ignored-modules=pytest
+
+
+[BASIC]
+
+# Required attributes for module, separated by a comma
+#required-attributes=
+
+# List of builtins function names that should not be used, separated by a comma.
+# exit & quit are for the interactive interpreter shell only.
+# https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module
+bad-functions=
+    apply,
+    exit,
+    filter,
+    input,
+    map,
+    quit,
+    raw_input,
+    reduce,
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=(test[A-Za-z0-9_]{2,30})|([a-z_][a-z0-9_]{2,30})$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=e,i,j,k,ex,Run,_,db,es
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=20
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the beginning of the name of dummy variables
+# (i.e. not used).
+dummy-variables-rgx=_|unused_
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+#additional-builtins=
+
+
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+#ignore-iface-methods=
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branchs=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=10
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma.
+# __builtin__: Use the 'six.moves.builtins' module instead
+#   (or 'builtins' in Python 3).
+# apiclient: Use the 'googleapiclient' module instead.
+# Bastion: Dropped in Python 3.
+# ConfigParser: Use the 'six.moves.configparser' module instead
+#   (or 'configparser' in Python 3).
+# cookielib: Use the 'six.moves.http_cookiejar' module instead
+#   (or 'http.cookiejar' in Python 3).
+# cPickle: Use the 'pickle' module instead.
+# cStringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
+# exceptions: Dropped in Python 3.
+# HTMLParser: Use the 'six.moves.html_parser' module instead
+#   (or 'html.parser' in Python 3).
+# httplib: Use the 'six.moves.http_client' module instead
+#   (or 'http.client' in Python 3).
+# md5: Use the 'hashlib' module instead.
+# mox: Use the 'mock' module instead.
+# optparse: Use the 'argparse' module instead.
+# Queue: Use the 'six.moves.queue' module instead (or 'queue' in Python 3).
+# regsub: Use the 're' module instead.
+# rexec: Dropped in Python 3.
+# StringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
+# TERMIOS: Use the 'termios' module instead.
+# urllib2: Use the 'six.moves.urllib' module instead
+#   (or 'urllib.request' in Python 3).
+# urlparse: Use the 'six.moves.urllib' module instead
+#   (or 'urllib.parse' in Python 3).
+deprecated-modules=
+    __builtin__,
+    apiclient,
+    Bastion,
+    ConfigParser,
+    cookielib,
+    cPickle,
+    cStringIO,
+    exceptions,
+    HTMLParser,
+    httplib,
+    md5,
+    mox,
+    optparse,
+    Queue,
+    regsub,
+    rexec,
+    StringIO,
+    TERMIOS,
+    urllib2,
+    urlparse,
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+#import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+#ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+#int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=
+    _emerge,
+    apiclient,
+    elftools,
+    gcloud,
+    google,
+    googleapiclient,
+    httplib2,
+    jinja2,
+    jsonschema,
+    lddtree,
+    magic,
+    mock,
+    oauth2client,
+    portage,
+    pylint,
+    requests,
+    six,
+    sqlalchemy,
+    yaml,
+
+
+[LOGGING]
+
+# Apply logging string format checks to calls on these modules.
+logging-modules=
+    logging,
--- a/.style.yapf
+++ b/.style.yapf
@ -0,0 +1,11 @@
+#
+# To run yapf for this project, invoke as such from the base directory:
+#     yapf -i -r --style .style.yapf ./dfdewey/
+#
+[style]
+based_on_style = yapf
+COALESCE_BRACKETS = True
+SPLIT_BEFORE_FIRST_ARGUMENT = True
+SPLIT_PENALTY_AFTER_OPENING_BRACKET = 0
+SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 30
+SPLIT_BEFORE_NAMED_ASSIGNS = False
--- a/dfdewey/datastore/elastic.py
+++ b/dfdewey/datastore/elastic.py
@ -27,7 +27,7 @@ es_logger = logging.getLogger('dfdewey.elasticsearch')
 es_logger.setLevel(logging.WARNING)


-class ElasticsearchDataStore(object):
+class ElasticsearchDataStore():
  """Implements the datastore."""

  # Number of events to queue up when bulk inserting events.
@ -36,7 +36,7 @@ class ElasticsearchDataStore(object):

  def __init__(self, host='127.0.0.1', port=9200):
    """Create an Elasticsearch client."""
-    super(ElasticsearchDataStore, self).__init__()
+    super().__init__()
    self.client = Elasticsearch([{'host': host, 'port': port}], timeout=30)
    self.import_counter = collections.Counter()
    self.import_events = []
@ -79,8 +79,8 @@ class ElasticsearchDataStore(object):
    if not self.client.indices.exists(index_name):
      try:
        self.client.indices.create(index=index_name)
-      except exceptions.ConnectionError:
-        raise RuntimeError('Unable to connect to backend datastore.')
+      except exceptions.ConnectionError as e:
+        raise RuntimeError('Unable to connect to backend datastore.') from e

    if not isinstance(index_name, six.text_type):
      index_name = codecs.decode(index_name, 'utf8')
@ -97,12 +97,11 @@ class ElasticsearchDataStore(object):
      try:
        self.client.indices.delete(index=index_name)
      except exceptions.ConnectionError as e:
-        raise RuntimeError(
-            'Unable to connect to backend datastore: {}'.format(e))
+        raise RuntimeError('Unable to connect to backend datastore.') from e

  def import_event(
-      self, index_name, event=None,
-      event_id=None, flush_interval=DEFAULT_FLUSH_INTERVAL):
+      self, index_name, event=None, event_id=None,
+      flush_interval=DEFAULT_FLUSH_INTERVAL):
    """Add event to Elasticsearch.

    Args:
@ -126,17 +125,8 @@ class ElasticsearchDataStore(object):
        event[k] = v

      # Header needed by Elasticsearch when bulk inserting.
-      header = {
-          'index': {
-              '_index': index_name
-          }
-      }
-      update_header = {
-          'update': {
-              '_index': index_name,
-              '_id': event_id
-          }
-      }
+      header = {'index': {'_index': index_name}}
+      update_header = {'update': {'_index': index_name, '_id': event_id}}

      if event_id:
        # Event has "lang" defined if there is a script used for import.
@ -182,7 +172,4 @@ class ElasticsearchDataStore(object):
    search_type = 'query_then_fetch'

    return self.client.search(
-        body=query_dsl,
-        index=index_id,
-        size=size,
-        search_type=search_type)
+        body=query_dsl, index=index_id, size=size, search_type=search_type)
--- a/dfdewey/datastore/postgresql.py
+++ b/dfdewey/datastore/postgresql.py
@ -24,22 +24,15 @@ postgresql_logger = logging.getLogger('dfdewey.postgresql')
 postgresql_logger.setLevel(logging.WARNING)


-class PostgresqlDataStore(object):
+class PostgresqlDataStore():
  """Implements the datastore."""

  def __init__(
-      self,
-      host='127.0.0.1',
-      port=5432,
-      db_name='dfdewey',
-      autocommit=False):
+      self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
    """Create a PostgreSQL client."""
-    super(PostgresqlDataStore, self).__init__()
+    super().__init__()
    self.db = psycopg2.connect(
-        database=db_name,
-        user='dfdewey',
-        password='password',
-        host=host,
+        database=db_name, user='dfdewey', password='password', host=host,
        port=port)
    if autocommit:
      self.db.set_isolation_level(
@ -60,9 +53,7 @@ class PostgresqlDataStore(object):
      rows: Array of value tuples to be inserted
    """
    extras.execute_values(
-        self.cursor,
-        'INSERT INTO {0:s} VALUES %s'.format(table_spec),
-        rows)
+        self.cursor, 'INSERT INTO {0:s} VALUES %s'.format(table_spec), rows)

  def execute(self, command):
    """Execute a command in the PostgreSQL database.
@ -111,10 +102,7 @@ class PostgresqlDataStore(object):
    self.db.commit()
    self.db.close()
    self.db = psycopg2.connect(
-        database=db_name,
-        user='dfdewey',
-        password='password',
-        host=host,
+        database=db_name, user='dfdewey', password='password', host=host,
        port=port)
    if autocommit:
      self.db.set_isolation_level(
@ -131,7 +119,8 @@ class PostgresqlDataStore(object):
    Returns:
      True if the table already exists, otherwise False
    """
-    self.cursor.execute("""
+    self.cursor.execute(
+        """
        SELECT 1 FROM information_schema.tables
        WHERE table_schema = '{0:s}' AND table_name = '{1:s}'""".format(
            table_schema, table_name))
@ -149,9 +138,9 @@ class PostgresqlDataStore(object):
    Returns:
      True if the value exists, otherwise False
    """
-    self.cursor.execute("""
+    self.cursor.execute(
+        """
        SELECT 1 from {0:s}
-        WHERE {1:s} = '{2:s}'""".format(
-            table_name, column_name, value))
+        WHERE {1:s} = '{2:s}'""".format(table_name, column_name, value))

    return self.cursor.fetchone()
--- a/dfdewey/dfdcli.py
+++ b/dfdewey/dfdcli.py
@ -25,7 +25,6 @@ from dfdewey.datastore.elastic import ElasticsearchDataStore
 from dfdewey.datastore.postgresql import PostgresqlDataStore
 from dfdewey.utils import image

-
 STRING_INDEXING_LOG_INTERVAL = 10000000


@ -89,10 +88,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
  image_path = os.path.abspath(image_file)
  output_path = tempfile.mkdtemp()

-  cmd = ['bulk_extractor',
-         '-o', output_path,
-         '-x', 'all',
-         '-e', 'wordlist']
+  cmd = ['bulk_extractor', '-o', output_path, '-x', 'all', '-e', 'wordlist']

  if base64:
    cmd.extend(['-e', 'base64'])
@ -109,7 +105,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
  print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd)))
  output = subprocess.check_output(cmd)
  md5_offset = output.index(b'MD5') + 19
-  image_hash = output[md5_offset:md5_offset+32].decode('utf-8')
+  image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
  print('String extraction completed: {0!s}'.format(datetime.datetime.now()))

  print('\n*** Parsing image')
@ -211,8 +207,9 @@ def search(query, case, image_path=None, query_list=None):

    images[image_hash[0]] = image_path
  else:
-    print('No image specified, searching all images in case \'{0:s}\''.format(
-        case))
+    print(
+        'No image specified, searching all images in case \'{0:s}\''.format(
+            case))
    image_hashes = case_db.query(
        'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format(
            case))
@ -234,29 +231,28 @@ def search(query, case, image_path=None, query_list=None):
          term = ''.join(('"', term.strip(), '"'))
          results = search_index(index, term)
          if results['hits']['total']['value'] > 0:
-            print('{0:s} - {1:d} hits'.format(
-                term, results['hits']['total']['value']))
+            print(
+                '{0:s} - {1:d} hits'.format(
+                    term, results['hits']['total']['value']))
    else:
      print('\n*** Searching for \'{0:s}\'...'.format(query))
      results = search_index(index, query)
      print('Returned {0:d} results:'.format(results['hits']['total']['value']))
      for hit in results['hits']['hits']:
        filename = image.get_filename_from_offset(
-            image_path,
-            hit['_source']['image'],
-            int(hit['_source']['offset']))
+            image_path, hit['_source']['image'], int(hit['_source']['offset']))
        if hit['_source']['file_offset']:
-          print('Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
-                'String: {3:s}'.format(
-                    hit['_source']['offset'],
-                    filename,
-                    hit['_source']['file_offset'],
-                    hit['_source']['data'].strip()))
+          print(
+              'Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
+              'String: {3:s}'.format(
+                  hit['_source']['offset'], filename,
+                  hit['_source']['file_offset'],
+                  hit['_source']['data'].strip()))
        else:
-          print('Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
-              hit['_source']['offset'],
-              filename,
-              hit['_source']['data'].strip()))
+          print(
+              'Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
+                  hit['_source']['offset'], filename,
+                  hit['_source']['data'].strip()))


 def search_index(index_id, search_query):
@ -278,8 +274,8 @@ def main():
  args = parse_args()
  if not args.search and not args.search_list:
    process_image(
-        args.image, args.case,
-        not args.no_base64, not args.no_gzip, not args.no_zip)
+        args.image, args.case, not args.no_base64, not args.no_gzip,
+        not args.no_zip)
  elif args.search:
    search(args.search, args.case, args.image)
  elif args.search_list:
--- a/dfdewey/utils/image.py
+++ b/dfdewey/utils/image.py
@ -14,9 +14,10 @@
 # limitations under the License.
 """Image File Access Functions."""

-from dfdewey.datastore.postgresql import PostgresqlDataStore
 import pytsk3

+from dfdewey.datastore.postgresql import PostgresqlDataStore
+

 def initialise_block_db(image_path, image_hash, case):
  """Creates a new image database.
@ -68,7 +69,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
    tracking_db.execute(
        'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')

-    tracking_db.execute("""
+    tracking_db.execute(
+        """
        CREATE TABLE image_case (
          case_id TEXT, image_hash TEXT REFERENCES images(image_hash), 
          PRIMARY KEY (case_id, image_hash))""")
@ -77,7 +79,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):

  image_case_exists = False
  if image_exists:
-    image_case = tracking_db.query_single_row("""
+    image_case = tracking_db.query_single_row(
+        """
        SELECT 1 from image_case
        WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format(
            image_hash, case))
@ -85,11 +88,13 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
      image_case_exists = True

  if not image_exists:
-    tracking_db.execute("""
+    tracking_db.execute(
+        """
        INSERT INTO images (image_path, image_hash)
        VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash))
  if not image_case_exists:
-    tracking_db.execute("""
+    tracking_db.execute(
+        """
        INSERT INTO image_case (case_id, image_hash)
        VALUES ('{0:s}', '{1:s}')""".format(case, image_hash))

@ -118,18 +123,25 @@ def populate_block_db(img, block_db, batch_size=1500):
      has_partition_table = True
    rows = []
    for part in volume:
-      print('Parsing partition {0:d}: {1:s}'.format(
-          part.addr, part.desc.decode('utf-8')))
+      print(
+          'Parsing partition {0:d}: {1:s}'.format(
+              part.addr, part.desc.decode('utf-8')))
      if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
        continue
-      fs = pytsk3.FS_Info(img, offset=part.start * volume.info.block_size)
-      for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
-        file = fs.open_meta(inode)
+      filesystem = pytsk3.FS_Info(
+          img, offset=part.start * volume.info.block_size)
+      for inode in range(filesystem.info.first_inum,
+                         filesystem.info.last_inum + 1):
+        file = filesystem.open_meta(inode)
        if file.info.meta.nlink > 0:
          for attr in file:
            for run in attr:
              for block in range(run.len):
-                rows.append((run.addr + block, inode, part.addr,))
+                rows.append((
+                    run.addr + block,
+                    inode,
+                    part.addr,
+                ))
                if len(rows) >= batch_size:
                  block_db.bulk_insert('blocks (block, inum, part)', rows)
                  rows = []
@ -137,22 +149,26 @@ def populate_block_db(img, block_db, batch_size=1500):
        block_db.bulk_insert('blocks (block, inum, part)', rows)

      # File names
-      directory = fs.open_dir(path='/')
+      directory = filesystem.open_dir(path='/')
      list_directory(block_db, directory, part=part.addr, batch_size=batch_size)
  except IOError:
    pass

  if not has_partition_table:
-    fs = pytsk3.FS_Info(img)
+    filesystem = pytsk3.FS_Info(img)
    rows = []
-    for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
+    for inode in range(filesystem.info.first_inum,
+                       filesystem.info.last_inum + 1):
      try:
-        file = fs.open_meta(inode)
+        file = filesystem.open_meta(inode)
        if file.info.meta.nlink > 0:
          for attr in file:
            for run in attr:
              for block in range(run.len):
-                rows.append((run.addr + block, inode,))
+                rows.append((
+                    run.addr + block,
+                    inode,
+                ))
                if len(rows) >= batch_size:
                  block_db.bulk_insert('blocks (block, inum)', rows)
                  rows = []
@ -162,7 +178,7 @@ def populate_block_db(img, block_db, batch_size=1500):
        continue

    # File names
-    directory = fs.open_dir(path='/')
+    directory = filesystem.open_dir(path='/')
    list_directory(block_db, directory, batch_size=batch_size)

  block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);')
@ -205,15 +221,19 @@ def list_directory(
      print('Unable to decode: {}'.format(directory_entry.info.name.name))
      continue
    if part:
-      rows.append((directory_entry.info.meta.addr,
-                   name.replace('\'', '\'\''),
-                   part,))
+      rows.append((
+          directory_entry.info.meta.addr,
+          name.replace('\'', '\'\''),
+          part,
+      ))
      if len(rows) >= batch_size:
        block_db.bulk_insert('files (inum, filename, part)', rows)
        rows = []
    else:
-      rows.append((directory_entry.info.meta.addr,
-                   name.replace('\'', '\'\''),))
+      rows.append((
+          directory_entry.info.meta.addr,
+          name.replace('\'', '\'\''),
+      ))
      if len(rows) >= batch_size:
        block_db.bulk_insert('files (inum, filename)', rows)
        rows = []
@ -224,11 +244,7 @@ def list_directory(

      if inode not in stack:
        rows = list_directory(
-            block_db,
-            sub_directory,
-            part=part,
-            stack=stack,
-            rows=rows,
+            block_db, sub_directory, part=part, stack=stack, rows=rows,
            batch_size=batch_size)

    except IOError:
@ -281,14 +297,14 @@ def get_filename_from_offset(image_path, image_hash, offset):
  if not unalloc_part:
    try:
      if not partition_offset:
-        fs = pytsk3.FS_Info(img)
+        filesystem = pytsk3.FS_Info(img)
      else:
        offset -= partition_offset * device_block_size
-        fs = pytsk3.FS_Info(
+        filesystem = pytsk3.FS_Info(
            img, offset=partition_offset * device_block_size)
    except TypeError as e:
      print(e)
-    block_size = fs.info.block_size
+    block_size = filesystem.info.block_size

    inums = get_inums(block_db, offset / block_size, part=partition)

@ -296,7 +312,7 @@ def get_filename_from_offset(image_path, image_hash, offset):
  if inums:
    for i in inums:
      real_inum = i[0]
-      if i[0] == 0 and fs.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
+      if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
        mft_record_size_offset = 0x40
        if partition_offset:
          mft_record_size_offset = \
@ -304,10 +320,10 @@ def get_filename_from_offset(image_path, image_hash, offset):
        mft_record_size = int.from_bytes(
            img.read(mft_record_size_offset, 1), 'little', signed=True)
        if mft_record_size < 0:
-          mft_record_size = 2 ** (mft_record_size * -1)
+          mft_record_size = 2**(mft_record_size * -1)
        else:
          mft_record_size = mft_record_size * block_size
-        real_inum = get_resident_inum(offset, fs, mft_record_size)
+        real_inum = get_resident_inum(offset, filesystem, mft_record_size)
      filename = get_filename(block_db, real_inum, part=partition)
      if filename and not filenames:
        filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
@ -343,21 +359,21 @@ def get_inums(block_db, block, part=None):
  return inums


-def get_resident_inum(offset, fs, mft_record_size):
+def get_resident_inum(offset, filesystem, mft_record_size):
  """Gets the inode number associated with NTFS $MFT resident data.

  Args:
    offset: Data offset within volume
-    fs: pytsk3 FS_INFO object
+    filesystem: pytsk3 FS_INFO object
    mft_record_size: Size of an $MFT entry

  Returns:
    inode number of resident data
  """
-  block_size = fs.info.block_size
+  block_size = filesystem.info.block_size
  offset_block = int(offset / block_size)

-  inode = fs.open_meta(0)
+  inode = filesystem.open_meta(0)
  mft_entry = 0
  for attr in inode:
    for run in attr:
--- a/dfdewey/yapf_test.py
+++ b/dfdewey/yapf_test.py
@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Enforce code style with YAPF."""
+
+import os
+import subprocess
+import unittest
+
+
+class StyleTest(unittest.TestCase):
+  """Enforce code style requirements."""
+
+  def testCodeStyle(self):
+    """Check YAPF style enforcement runs cleanly."""
+    dfdewey_path = os.path.abspath(os.path.dirname(__file__))
+    config_path = os.path.join(dfdewey_path, '..', '.style.yapf')
+    try:
+      subprocess.check_output(
+          ['yapf', '--style', config_path, '--diff', '-r', dfdewey_path])
+    except subprocess.CalledProcessError as e:
+      if hasattr(e, 'output'):
+        raise Exception(
+            'Run "yapf --style {0:s} -i -r {1:s}" '
+            'to correct these problems: {2:s}'.format(
+                config_path, dfdewey_path, e.output.decode('utf-8'))) from e
+      raise
+
+
+if __name__ == '__main__':
+  unittest.main()
--- a/run_tests.py
+++ b/run_tests.py
@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Script to run tests."""
+
+import subprocess
+
+if __name__ == '__main__':
+  subprocess.check_call([
+      'nosetests', '-vv', '--with-coverage', '--cover-package=dfdewey', '--exe'
+  ])
--- a/setup.py
+++ b/setup.py
@ -20,9 +20,13 @@ import sys
 from setuptools import find_packages
 from setuptools import setup

+import dfdewey
+
 sys.path.insert(0, '.')

-import dfdewey
+DFDEWEY_DESCRIPTION = (
+    'dfDewey is a digital forensics string extraction, indexing, and searching '
+    'tool.')

 requirements = []
 with open('requirements.txt','r') as f:
@ -30,7 +34,7 @@ with open('requirements.txt','r') as f:
 setup(
    name='dfDewey',
    version=dfdewey.__version__,
-    description='dfDewey is a digital forensics string extraction, indexing, and searching tool.',
+    description=DFDEWEY_DESCRIPTION,
    license='Apache License, Version 2.0',
    maintainer='dfDewey development team',
    maintainer_email='dfdewey-dev@googlegroups.com',
@ -38,6 +42,6 @@ setup(
    include_package_data=True,
    install_requires=requirements,
    extras_require={
-        'dev': []
+        'dev': ['mock', 'nose', 'yapf', 'coverage']
    }
 )