From 0ff2bed3ef8e31b8b050793c05a669b826a29d37 Mon Sep 17 00:00:00 2001
From: Jason Solomon <jxs@google.com>
Date: Mon, 16 Nov 2020 14:30:34 +1100
Subject: [PATCH] Formatting test

---
 .gitignore                      |  15 ++
 .pylintrc                       | 365 ++++++++++++++++++++++++++++++++
 .style.yapf                     |  11 +
 dfdewey/datastore/__init__.py   |   2 +-
 dfdewey/datastore/elastic.py    |  33 +--
 dfdewey/datastore/postgresql.py |  33 +--
 dfdewey/dfdcli.py               |  46 ++--
 dfdewey/utils/__init__.py       |   2 +-
 dfdewey/utils/image.py          |  90 ++++----
 dfdewey/yapf_test.py            |  42 ++++
 run_tests.py                    |  23 ++
 setup.py                        |  10 +-
 12 files changed, 560 insertions(+), 112 deletions(-)
 create mode 100644 .pylintrc
 create mode 100644 .style.yapf
 create mode 100644 dfdewey/yapf_test.py
 create mode 100755 run_tests.py
diff --git a/.gitignore b/.gitignore
index 8d5a743..3e38430 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,12 +3,27 @@
 # Back-up files
 *~
 
+# Don't include build related files.
+/build/
+/dist/
+
 # Egg files
 /dfDewey.egg-info
 
+# Test files
+.coverage
+
 # Generic auto-generated build files
 *.pyc
 *.pyo
 
 # Specific auto-generated build files
 /__pycache__
+
+# IDE files
+.idea/
+.vscode/
+
+# Pipfile
+Pipfile
+Pipfile.lock
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..4498033
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,365 @@
+# Original file copied from:
+# https://chromium.googlesource.com/chromiumos/chromite/+/master/pylintrc
+
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Profiled execution.
+#profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=CVS
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+    .*_pb2\.py$
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+#load-plugins=
+
+# Configure quote preferences.
+string-quote = single-avoid-escape
+triple-quote = double
+docstring-quote = double
+
+
+[MESSAGES CONTROL]
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple times.
+# cros-logging-import: logging is deprecated. Use "from chromite.lib import
+#   cros_logging as logging" to import chromite/lib/cros_logging.
+# eq-without-hash: We omit this as we don't require all objects be hashable.
+#   We'll wait for unittest coverage to detect missing __hash__ on objects.
+# no-absolute-import: We don't seem to rely on this behavior, so don't enforce
+#   using this future import everywhere.
+# round-builtin: We omit this as all our usage of round() is OK with either
+#   Python 2 or 3 behavior (and probably leans towards 3 anyways).
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html
+output-format=text
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+# CHANGE: No report.
+reports=no
+
+# Activate the evaluation score.
+score=no
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (RP0004).
+comment=no
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+# CHANGE: Use "  " instead.
+indent-string='  '
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of classes names for which member attributes should not be checked
+# (useful for classes with attributes dynamically set).
+ignored-classes=pytsk3
+
+# When zope mode is activated, add a predefined set of Zope acquired attributes
+# to generated-members.
+#zope=no
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E0201 when accessed.
+# CHANGE: Added 'AndRaise', 'AndReturn', 'InAnyOrder' and 'MultipleTimes' for pymox.
+# CHANGE: Added tempdir for @osutils.TempDirDecorator.
+#generated-members=
+
+# List of modules for which member attributes should not be checked.
+# Modules listed here will not trigger import errors even if the linter can't
+# import them.
+#
+# pytest: Made available by our testing virtualenv and can be assumed exists.
+ignored-modules=pytest
+
+
+[BASIC]
+
+# Required attributes for module, separated by a comma
+#required-attributes=
+
+# List of builtins function names that should not be used, separated by a comma.
+# exit & quit are for the interactive interpreter shell only.
+# https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module
+bad-functions=
+    apply,
+    exit,
+    filter,
+    input,
+    map,
+    quit,
+    raw_input,
+    reduce,
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=(test[A-Za-z0-9_]{2,30})|([a-z_][a-z0-9_]{2,30})$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=e,i,j,k,ex,Run,_,db,es
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=20
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the beginning of the name of dummy variables
+# (i.e. not used).
+dummy-variables-rgx=_|unused_
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+#additional-builtins=
+
+
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+#ignore-iface-methods=
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branchs=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=10
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma.
+# __builtin__: Use the 'six.moves.builtins' module instead
+#   (or 'builtins' in Python 3).
+# apiclient: Use the 'googleapiclient' module instead.
+# Bastion: Dropped in Python 3.
+# ConfigParser: Use the 'six.moves.configparser' module instead
+#   (or 'configparser' in Python 3).
+# cookielib: Use the 'six.moves.http_cookiejar' module instead
+#   (or 'http.cookiejar' in Python 3).
+# cPickle: Use the 'pickle' module instead.
+# cStringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
+# exceptions: Dropped in Python 3.
+# HTMLParser: Use the 'six.moves.html_parser' module instead
+#   (or 'html.parser' in Python 3).
+# httplib: Use the 'six.moves.http_client' module instead
+#   (or 'http.client' in Python 3).
+# md5: Use the 'hashlib' module instead.
+# mox: Use the 'mock' module instead.
+# optparse: Use the 'argparse' module instead.
+# Queue: Use the 'six.moves.queue' module instead (or 'queue' in Python 3).
+# regsub: Use the 're' module instead.
+# rexec: Dropped in Python 3.
+# StringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
+# TERMIOS: Use the 'termios' module instead.
+# urllib2: Use the 'six.moves.urllib' module instead
+#   (or 'urllib.request' in Python 3).
+# urlparse: Use the 'six.moves.urllib' module instead
+#   (or 'urllib.parse' in Python 3).
+deprecated-modules=
+    __builtin__,
+    apiclient,
+    Bastion,
+    ConfigParser,
+    cookielib,
+    cPickle,
+    cStringIO,
+    exceptions,
+    HTMLParser,
+    httplib,
+    md5,
+    mox,
+    optparse,
+    Queue,
+    regsub,
+    rexec,
+    StringIO,
+    TERMIOS,
+    urllib2,
+    urlparse,
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+#import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+#ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+#int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=
+    _emerge,
+    apiclient,
+    elftools,
+    gcloud,
+    google,
+    googleapiclient,
+    httplib2,
+    jinja2,
+    jsonschema,
+    lddtree,
+    magic,
+    mock,
+    oauth2client,
+    portage,
+    pylint,
+    requests,
+    six,
+    sqlalchemy,
+    yaml,
+
+
+[LOGGING]
+
+# Apply logging string format checks to calls on these modules.
+logging-modules=
+    logging,
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000..43b8e6e
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,11 @@
+#
+# To run yapf for this project, invoke as such from the base directory:
+#     yapf -i -r --style .style.yapf ./dfdewey/
+#
+[style]
+based_on_style = yapf
+COALESCE_BRACKETS = True
+SPLIT_BEFORE_FIRST_ARGUMENT = True
+SPLIT_PENALTY_AFTER_OPENING_BRACKET = 0
+SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 30
+SPLIT_BEFORE_NAMED_ASSIGNS = False
diff --git a/dfdewey/datastore/__init__.py b/dfdewey/datastore/__init__.py
index 692a75a..d91e05e 100644
--- a/dfdewey/datastore/__init__.py
+++ b/dfdewey/datastore/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""DFDewey Datastore Package."""
\ No newline at end of file
+"""DFDewey Datastore Package."""
diff --git a/dfdewey/datastore/elastic.py b/dfdewey/datastore/elastic.py
index 6831eca..7872d22 100644
--- a/dfdewey/datastore/elastic.py
+++ b/dfdewey/datastore/elastic.py
@@ -27,7 +27,7 @@ es_logger = logging.getLogger('dfdewey.elasticsearch')
 es_logger.setLevel(logging.WARNING)
 
 
-class ElasticsearchDataStore(object):
+class ElasticsearchDataStore():
   """Implements the datastore."""
 
   # Number of events to queue up when bulk inserting events.
@@ -36,7 +36,7 @@ class ElasticsearchDataStore(object):
 
   def __init__(self, host='127.0.0.1', port=9200):
     """Create an Elasticsearch client."""
-    super(ElasticsearchDataStore, self).__init__()
+    super().__init__()
     self.client = Elasticsearch([{'host': host, 'port': port}], timeout=30)
     self.import_counter = collections.Counter()
     self.import_events = []
@@ -79,8 +79,8 @@ class ElasticsearchDataStore(object):
     if not self.client.indices.exists(index_name):
       try:
         self.client.indices.create(index=index_name)
-      except exceptions.ConnectionError:
-        raise RuntimeError('Unable to connect to backend datastore.')
+      except exceptions.ConnectionError as e:
+        raise RuntimeError('Unable to connect to backend datastore.') from e
 
     if not isinstance(index_name, six.text_type):
       index_name = codecs.decode(index_name, 'utf8')
@@ -97,12 +97,11 @@ class ElasticsearchDataStore(object):
       try:
         self.client.indices.delete(index=index_name)
       except exceptions.ConnectionError as e:
-        raise RuntimeError(
-            'Unable to connect to backend datastore: {}'.format(e))
+        raise RuntimeError('Unable to connect to backend datastore.') from e
 
   def import_event(
-      self, index_name, event=None,
-      event_id=None, flush_interval=DEFAULT_FLUSH_INTERVAL):
+      self, index_name, event=None, event_id=None,
+      flush_interval=DEFAULT_FLUSH_INTERVAL):
     """Add event to Elasticsearch.
 
     Args:
@@ -126,17 +125,8 @@ class ElasticsearchDataStore(object):
         event[k] = v
 
       # Header needed by Elasticsearch when bulk inserting.
-      header = {
-          'index': {
-              '_index': index_name
-          }
-      }
-      update_header = {
-          'update': {
-              '_index': index_name,
-              '_id': event_id
-          }
-      }
+      header = {'index': {'_index': index_name}}
+      update_header = {'update': {'_index': index_name, '_id': event_id}}
 
       if event_id:
         # Event has "lang" defined if there is a script used for import.
@@ -182,7 +172,4 @@ class ElasticsearchDataStore(object):
     search_type = 'query_then_fetch'
 
     return self.client.search(
-        body=query_dsl,
-        index=index_id,
-        size=size,
-        search_type=search_type)
+        body=query_dsl, index=index_id, size=size, search_type=search_type)
diff --git a/dfdewey/datastore/postgresql.py b/dfdewey/datastore/postgresql.py
index 86c2663..921d1e8 100644
--- a/dfdewey/datastore/postgresql.py
+++ b/dfdewey/datastore/postgresql.py
@@ -24,22 +24,15 @@ postgresql_logger = logging.getLogger('dfdewey.postgresql')
 postgresql_logger.setLevel(logging.WARNING)
 
 
-class PostgresqlDataStore(object):
+class PostgresqlDataStore():
   """Implements the datastore."""
 
   def __init__(
-      self,
-      host='127.0.0.1',
-      port=5432,
-      db_name='dfdewey',
-      autocommit=False):
+      self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
     """Create a PostgreSQL client."""
-    super(PostgresqlDataStore, self).__init__()
+    super().__init__()
     self.db = psycopg2.connect(
-        database=db_name,
-        user='dfdewey',
-        password='password',
-        host=host,
+        database=db_name, user='dfdewey', password='password', host=host,
         port=port)
     if autocommit:
       self.db.set_isolation_level(
@@ -60,9 +53,7 @@ class PostgresqlDataStore(object):
       rows: Array of value tuples to be inserted
     """
     extras.execute_values(
-        self.cursor,
-        'INSERT INTO {0:s} VALUES %s'.format(table_spec),
-        rows)
+        self.cursor, 'INSERT INTO {0:s} VALUES %s'.format(table_spec), rows)
 
   def execute(self, command):
     """Execute a command in the PostgreSQL database.
@@ -111,10 +102,7 @@ class PostgresqlDataStore(object):
     self.db.commit()
     self.db.close()
     self.db = psycopg2.connect(
-        database=db_name,
-        user='dfdewey',
-        password='password',
-        host=host,
+        database=db_name, user='dfdewey', password='password', host=host,
         port=port)
     if autocommit:
       self.db.set_isolation_level(
@@ -131,7 +119,8 @@ class PostgresqlDataStore(object):
     Returns:
       True if the table already exists, otherwise False
     """
-    self.cursor.execute("""
+    self.cursor.execute(
+        """
         SELECT 1 FROM information_schema.tables
         WHERE table_schema = '{0:s}' AND table_name = '{1:s}'""".format(
             table_schema, table_name))
@@ -149,9 +138,9 @@ class PostgresqlDataStore(object):
     Returns:
       True if the value exists, otherwise False
     """
-    self.cursor.execute("""
+    self.cursor.execute(
+        """
         SELECT 1 from {0:s}
-        WHERE {1:s} = '{2:s}'""".format(
-            table_name, column_name, value))
+        WHERE {1:s} = '{2:s}'""".format(table_name, column_name, value))
 
     return self.cursor.fetchone()
diff --git a/dfdewey/dfdcli.py b/dfdewey/dfdcli.py
index e983f1e..9622cd6 100755
--- a/dfdewey/dfdcli.py
+++ b/dfdewey/dfdcli.py
@@ -25,7 +25,6 @@ from dfdewey.datastore.elastic import ElasticsearchDataStore
 from dfdewey.datastore.postgresql import PostgresqlDataStore
 from dfdewey.utils import image
 
-
 STRING_INDEXING_LOG_INTERVAL = 10000000
 
 
@@ -89,10 +88,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
   image_path = os.path.abspath(image_file)
   output_path = tempfile.mkdtemp()
 
-  cmd = ['bulk_extractor',
-         '-o', output_path,
-         '-x', 'all',
-         '-e', 'wordlist']
+  cmd = ['bulk_extractor', '-o', output_path, '-x', 'all', '-e', 'wordlist']
 
   if base64:
     cmd.extend(['-e', 'base64'])
@@ -109,7 +105,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
   print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd)))
   output = subprocess.check_output(cmd)
   md5_offset = output.index(b'MD5') + 19
-  image_hash = output[md5_offset:md5_offset+32].decode('utf-8')
+  image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
   print('String extraction completed: {0!s}'.format(datetime.datetime.now()))
 
   print('\n*** Parsing image')
@@ -211,8 +207,9 @@ def search(query, case, image_path=None, query_list=None):
 
     images[image_hash[0]] = image_path
   else:
-    print('No image specified, searching all images in case \'{0:s}\''.format(
-        case))
+    print(
+        'No image specified, searching all images in case \'{0:s}\''.format(
+            case))
     image_hashes = case_db.query(
         'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format(
             case))
@@ -234,29 +231,28 @@ def search(query, case, image_path=None, query_list=None):
           term = ''.join(('"', term.strip(), '"'))
           results = search_index(index, term)
           if results['hits']['total']['value'] > 0:
-            print('{0:s} - {1:d} hits'.format(
-                term, results['hits']['total']['value']))
+            print(
+                '{0:s} - {1:d} hits'.format(
+                    term, results['hits']['total']['value']))
     else:
       print('\n*** Searching for \'{0:s}\'...'.format(query))
       results = search_index(index, query)
       print('Returned {0:d} results:'.format(results['hits']['total']['value']))
       for hit in results['hits']['hits']:
         filename = image.get_filename_from_offset(
-            image_path,
-            hit['_source']['image'],
-            int(hit['_source']['offset']))
+            image_path, hit['_source']['image'], int(hit['_source']['offset']))
         if hit['_source']['file_offset']:
-          print('Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
-                'String: {3:s}'.format(
-                    hit['_source']['offset'],
-                    filename,
-                    hit['_source']['file_offset'],
-                    hit['_source']['data'].strip()))
+          print(
+              'Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
+              'String: {3:s}'.format(
+                  hit['_source']['offset'], filename,
+                  hit['_source']['file_offset'],
+                  hit['_source']['data'].strip()))
         else:
-          print('Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
-              hit['_source']['offset'],
-              filename,
-              hit['_source']['data'].strip()))
+          print(
+              'Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
+                  hit['_source']['offset'], filename,
+                  hit['_source']['data'].strip()))
 
 
 def search_index(index_id, search_query):
@@ -278,8 +274,8 @@ def main():
   args = parse_args()
   if not args.search and not args.search_list:
     process_image(
-        args.image, args.case,
-        not args.no_base64, not args.no_gzip, not args.no_zip)
+        args.image, args.case, not args.no_base64, not args.no_gzip,
+        not args.no_zip)
   elif args.search:
     search(args.search, args.case, args.image)
   elif args.search_list:
diff --git a/dfdewey/utils/__init__.py b/dfdewey/utils/__init__.py
index e2df6ad..925071d 100644
--- a/dfdewey/utils/__init__.py
+++ b/dfdewey/utils/__init__.py
@@ -12,4 +12,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""DFDewey Utils Package."""
\ No newline at end of file
+"""DFDewey Utils Package."""
diff --git a/dfdewey/utils/image.py b/dfdewey/utils/image.py
index 56de867..a070d68 100644
--- a/dfdewey/utils/image.py
+++ b/dfdewey/utils/image.py
@@ -14,9 +14,10 @@
 # limitations under the License.
 """Image File Access Functions."""
 
-from dfdewey.datastore.postgresql import PostgresqlDataStore
 import pytsk3
 
+from dfdewey.datastore.postgresql import PostgresqlDataStore
+
 
 def initialise_block_db(image_path, image_hash, case):
   """Creates a new image database.
@@ -68,7 +69,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
     tracking_db.execute(
         'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
 
-    tracking_db.execute("""
+    tracking_db.execute(
+        """
         CREATE TABLE image_case (
           case_id TEXT, image_hash TEXT REFERENCES images(image_hash), 
           PRIMARY KEY (case_id, image_hash))""")
@@ -77,7 +79,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
 
   image_case_exists = False
   if image_exists:
-    image_case = tracking_db.query_single_row("""
+    image_case = tracking_db.query_single_row(
+        """
         SELECT 1 from image_case
         WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format(
             image_hash, case))
@@ -85,11 +88,13 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
       image_case_exists = True
 
   if not image_exists:
-    tracking_db.execute("""
+    tracking_db.execute(
+        """
         INSERT INTO images (image_path, image_hash)
         VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash))
   if not image_case_exists:
-    tracking_db.execute("""
+    tracking_db.execute(
+        """
         INSERT INTO image_case (case_id, image_hash)
         VALUES ('{0:s}', '{1:s}')""".format(case, image_hash))
 
@@ -118,18 +123,25 @@ def populate_block_db(img, block_db, batch_size=1500):
       has_partition_table = True
     rows = []
     for part in volume:
-      print('Parsing partition {0:d}: {1:s}'.format(
-          part.addr, part.desc.decode('utf-8')))
+      print(
+          'Parsing partition {0:d}: {1:s}'.format(
+              part.addr, part.desc.decode('utf-8')))
       if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
         continue
-      fs = pytsk3.FS_Info(img, offset=part.start * volume.info.block_size)
-      for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
-        file = fs.open_meta(inode)
+      filesystem = pytsk3.FS_Info(
+          img, offset=part.start * volume.info.block_size)
+      for inode in range(filesystem.info.first_inum,
+                         filesystem.info.last_inum + 1):
+        file = filesystem.open_meta(inode)
         if file.info.meta.nlink > 0:
           for attr in file:
             for run in attr:
               for block in range(run.len):
-                rows.append((run.addr + block, inode, part.addr,))
+                rows.append((
+                    run.addr + block,
+                    inode,
+                    part.addr,
+                ))
                 if len(rows) >= batch_size:
                   block_db.bulk_insert('blocks (block, inum, part)', rows)
                   rows = []
@@ -137,22 +149,26 @@ def populate_block_db(img, block_db, batch_size=1500):
         block_db.bulk_insert('blocks (block, inum, part)', rows)
 
       # File names
-      directory = fs.open_dir(path='/')
+      directory = filesystem.open_dir(path='/')
       list_directory(block_db, directory, part=part.addr, batch_size=batch_size)
   except IOError:
     pass
 
   if not has_partition_table:
-    fs = pytsk3.FS_Info(img)
+    filesystem = pytsk3.FS_Info(img)
     rows = []
-    for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
+    for inode in range(filesystem.info.first_inum,
+                       filesystem.info.last_inum + 1):
       try:
-        file = fs.open_meta(inode)
+        file = filesystem.open_meta(inode)
         if file.info.meta.nlink > 0:
           for attr in file:
             for run in attr:
               for block in range(run.len):
-                rows.append((run.addr + block, inode,))
+                rows.append((
+                    run.addr + block,
+                    inode,
+                ))
                 if len(rows) >= batch_size:
                   block_db.bulk_insert('blocks (block, inum)', rows)
                   rows = []
@@ -162,7 +178,7 @@ def populate_block_db(img, block_db, batch_size=1500):
         continue
 
     # File names
-    directory = fs.open_dir(path='/')
+    directory = filesystem.open_dir(path='/')
     list_directory(block_db, directory, batch_size=batch_size)
 
   block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);')
@@ -205,15 +221,19 @@ def list_directory(
       print('Unable to decode: {}'.format(directory_entry.info.name.name))
       continue
     if part:
-      rows.append((directory_entry.info.meta.addr,
-                   name.replace('\'', '\'\''),
-                   part,))
+      rows.append((
+          directory_entry.info.meta.addr,
+          name.replace('\'', '\'\''),
+          part,
+      ))
       if len(rows) >= batch_size:
         block_db.bulk_insert('files (inum, filename, part)', rows)
         rows = []
     else:
-      rows.append((directory_entry.info.meta.addr,
-                   name.replace('\'', '\'\''),))
+      rows.append((
+          directory_entry.info.meta.addr,
+          name.replace('\'', '\'\''),
+      ))
       if len(rows) >= batch_size:
         block_db.bulk_insert('files (inum, filename)', rows)
         rows = []
@@ -224,11 +244,7 @@ def list_directory(
 
       if inode not in stack:
         rows = list_directory(
-            block_db,
-            sub_directory,
-            part=part,
-            stack=stack,
-            rows=rows,
+            block_db, sub_directory, part=part, stack=stack, rows=rows,
             batch_size=batch_size)
 
     except IOError:
@@ -281,14 +297,14 @@ def get_filename_from_offset(image_path, image_hash, offset):
   if not unalloc_part:
     try:
       if not partition_offset:
-        fs = pytsk3.FS_Info(img)
+        filesystem = pytsk3.FS_Info(img)
       else:
         offset -= partition_offset * device_block_size
-        fs = pytsk3.FS_Info(
+        filesystem = pytsk3.FS_Info(
             img, offset=partition_offset * device_block_size)
     except TypeError as e:
       print(e)
-    block_size = fs.info.block_size
+    block_size = filesystem.info.block_size
 
     inums = get_inums(block_db, offset / block_size, part=partition)
 
@@ -296,7 +312,7 @@ def get_filename_from_offset(image_path, image_hash, offset):
   if inums:
     for i in inums:
       real_inum = i[0]
-      if i[0] == 0 and fs.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
+      if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
         mft_record_size_offset = 0x40
         if partition_offset:
           mft_record_size_offset = \
@@ -304,10 +320,10 @@ def get_filename_from_offset(image_path, image_hash, offset):
         mft_record_size = int.from_bytes(
             img.read(mft_record_size_offset, 1), 'little', signed=True)
         if mft_record_size < 0:
-          mft_record_size = 2 ** (mft_record_size * -1)
+          mft_record_size = 2**(mft_record_size * -1)
         else:
           mft_record_size = mft_record_size * block_size
-        real_inum = get_resident_inum(offset, fs, mft_record_size)
+        real_inum = get_resident_inum(offset, filesystem, mft_record_size)
       filename = get_filename(block_db, real_inum, part=partition)
       if filename and not filenames:
         filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
@@ -343,21 +359,21 @@ def get_inums(block_db, block, part=None):
   return inums
 
 
-def get_resident_inum(offset, fs, mft_record_size):
+def get_resident_inum(offset, filesystem, mft_record_size):
   """Gets the inode number associated with NTFS $MFT resident data.
 
   Args:
     offset: Data offset within volume
-    fs: pytsk3 FS_INFO object
+    filesystem: pytsk3 FS_INFO object
     mft_record_size: Size of an $MFT entry
 
   Returns:
     inode number of resident data
   """
-  block_size = fs.info.block_size
+  block_size = filesystem.info.block_size
   offset_block = int(offset / block_size)
 
-  inode = fs.open_meta(0)
+  inode = filesystem.open_meta(0)
   mft_entry = 0
   for attr in inode:
     for run in attr:
diff --git a/dfdewey/yapf_test.py b/dfdewey/yapf_test.py
new file mode 100644
index 0000000..c238a8a
--- /dev/null
+++ b/dfdewey/yapf_test.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Enforce code style with YAPF."""
+
+import os
+import subprocess
+import unittest
+
+
+class StyleTest(unittest.TestCase):
+  """Enforce code style requirements."""
+
+  def testCodeStyle(self):
+    """Check YAPF style enforcement runs cleanly."""
+    dfdewey_path = os.path.abspath(os.path.dirname(__file__))
+    config_path = os.path.join(dfdewey_path, '..', '.style.yapf')
+    try:
+      subprocess.check_output(
+          ['yapf', '--style', config_path, '--diff', '-r', dfdewey_path])
+    except subprocess.CalledProcessError as e:
+      if hasattr(e, 'output'):
+        raise Exception(
+            'Run "yapf --style {0:s} -i -r {1:s}" '
+            'to correct these problems: {2:s}'.format(
+                config_path, dfdewey_path, e.output.decode('utf-8'))) from e
+      raise
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/run_tests.py b/run_tests.py
new file mode 100755
index 0000000..860c497
--- /dev/null
+++ b/run_tests.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Script to run tests."""
+
+import subprocess
+
+if __name__ == '__main__':
+  subprocess.check_call([
+      'nosetests', '-vv', '--with-coverage', '--cover-package=dfdewey', '--exe'
+  ])
diff --git a/setup.py b/setup.py
index 805b9b0..e0964c5 100644
--- a/setup.py
+++ b/setup.py
@@ -20,9 +20,13 @@ import sys
 from setuptools import find_packages
 from setuptools import setup
 
+import dfdewey
+
 sys.path.insert(0, '.')
 
-import dfdewey
+DFDEWEY_DESCRIPTION = (
+    'dfDewey is a digital forensics string extraction, indexing, and searching '
+    'tool.')
 
 requirements = []
 with open('requirements.txt','r') as f:
@@ -30,7 +34,7 @@ with open('requirements.txt','r') as f:
 setup(
     name='dfDewey',
     version=dfdewey.__version__,
-    description='dfDewey is a digital forensics string extraction, indexing, and searching tool.',
+    description=DFDEWEY_DESCRIPTION,
     license='Apache License, Version 2.0',
     maintainer='dfDewey development team',
     maintainer_email='dfdewey-dev@googlegroups.com',
@@ -38,6 +42,6 @@ setup(
     include_package_data=True,
     install_requires=requirements,
     extras_require={
-        'dev': []
+        'dev': ['mock', 'nose', 'yapf', 'coverage']
     }
 )