Formatting test

2020-11-16 14:30:34 +11:00 · 2020-11-16 14:30:34 +11:00 · 0ff2bed3ef
commit 0ff2bed3ef
parent d277394b5c
12 changed files with 560 additions and 112 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,12 +3,27 @@
 # Back-up files
 *~
 # Don't include build related files.
 /build/
 /dist/
 # Egg files
 /dfDewey.egg-info
 # Test files
 .coverage
 # Generic auto-generated build files
 *.pyc
 *.pyo
 # Specific auto-generated build files
 /__pycache__
 # IDE files
 .idea/
 .vscode/
 # Pipfile
 Pipfile
 Pipfile.lock
--- a/.pylintrc
+++ b/.pylintrc
@ -0,0 +1,365 @@
 # Original file copied from:
 # https://chromium.googlesource.com/chromiumos/chromite/+/master/pylintrc
 [MASTER]
 # Specify a configuration file.
 #rcfile=
 # Python code to execute, usually for sys.path manipulation such as
 # pygtk.require().
 #init-hook=
 # Profiled execution.
 #profile=no
 # Add <file or directory> to the black list. It should be a base name, not a
 # path. You may set this option multiple times.
 ignore=CVS
 # Add files or directories matching the regex patterns to the blacklist. The
 # regex matches against base names, not paths.
 ignore-patterns=
    .*_pb2\.py$
 # Pickle collected data for later comparisons.
 persistent=yes
 # List of plugins (as comma separated values of python modules names) to load,
 # usually to register additional checkers.
 #load-plugins=
 # Configure quote preferences.
 string-quote = single-avoid-escape
 triple-quote = double
 docstring-quote = double
 [MESSAGES CONTROL]
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
 # multiple times.
 # cros-logging-import: logging is deprecated. Use "from chromite.lib import
 #   cros_logging as logging" to import chromite/lib/cros_logging.
 # eq-without-hash: We omit this as we don't require all objects be hashable.
 #   We'll wait for unittest coverage to detect missing __hash__ on objects.
 # no-absolute-import: We don't seem to rely on this behavior, so don't enforce
 #   using this future import everywhere.
 # round-builtin: We omit this as all our usage of round() is OK with either
 #   Python 2 or 3 behavior (and probably leans towards 3 anyways).
 #enable=
 # Disable the message, report, category or checker with the given id(s). You
 # can either give multiple identifiers separated by comma (,) or put this
 # option multiple times (only on the command line, not in the configuration
 # file where it should appear only once). You can also use "--disable=all" to
 # disable everything first and then reenable specific checks. For example, if
 # you want to run only the similarities checker, you can use "--disable=all
 # --enable=similarities". If you want to run only the classes checker, but have
 # no Warning level messages displayed, use "--disable=all --enable=classes
 # --disable=W".
 disable=
 [REPORTS]
 # Set the output format. Available formats are text, parseable, colorized, msvs
 # (visual studio) and html
 output-format=text
 # Put messages in a separate file for each module / package specified on the
 # command line instead of printing them on stdout. Reports (if any) will be
 # written in a file name "pylint_global.[txt|html]".
 files-output=no
 # Tells whether to display a full report or only the messages
 # CHANGE: No report.
 reports=no
 # Activate the evaluation score.
 score=no
 # Python expression which should return a note less than 10 (10 is the highest
 # note). You have access to the variables errors warning, statement which
 # respectively contain the number of errors / warnings messages and the total
 # number of statements analyzed. This is used by the global evaluation report
 # (RP0004).
 evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 # Add a comment according to your evaluation note. This is used by the global
 # evaluation report (RP0004).
 comment=no
 [MISCELLANEOUS]
 # List of note tags to take in consideration, separated by a comma.
 notes=FIXME,XXX,TODO
 [FORMAT]
 # Maximum number of characters on a single line.
 max-line-length=80
 # Maximum number of lines in a module
 max-module-lines=1000
 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
 # tab).
 # CHANGE: Use "  " instead.
 indent-string='  '
 [TYPECHECK]
 # Tells whether missing members accessed in mixin class should be ignored. A
 # mixin class is detected if its name ends with "mixin" (case insensitive).
 ignore-mixin-members=yes
 # List of classes names for which member attributes should not be checked
 # (useful for classes with attributes dynamically set).
 ignored-classes=pytsk3
 # When zope mode is activated, add a predefined set of Zope acquired attributes
 # to generated-members.
 #zope=no
 # List of members which are set dynamically and missed by pylint inference
 # system, and so shouldn't trigger E0201 when accessed.
 # CHANGE: Added 'AndRaise', 'AndReturn', 'InAnyOrder' and 'MultipleTimes' for pymox.
 # CHANGE: Added tempdir for @osutils.TempDirDecorator.
 #generated-members=
 # List of modules for which member attributes should not be checked.
 # Modules listed here will not trigger import errors even if the linter can't
 # import them.
 #
 # pytest: Made available by our testing virtualenv and can be assumed exists.
 ignored-modules=pytest
 [BASIC]
 # Required attributes for module, separated by a comma
 #required-attributes=
 # List of builtins function names that should not be used, separated by a comma.
 # exit & quit are for the interactive interpreter shell only.
 # https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module
 bad-functions=
    apply,
    exit,
    filter,
    input,
    map,
    quit,
    raw_input,
    reduce,
 # Regular expression which should only match correct module names
 module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
 # Regular expression which should only match correct module level names
 const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
 # Regular expression which should only match correct class names
 class-rgx=[A-Z_][a-zA-Z0-9]+$
 # Regular expression which should only match correct function names
 function-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct method names
 method-rgx=(test[A-Za-z0-9_]{2,30})|([a-z_][a-z0-9_]{2,30})$
 # Regular expression which should only match correct instance attribute names
 attr-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct argument names
 argument-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct variable names
 variable-rgx=[a-z_][a-z0-9_]{2,30}$
 # Regular expression which should only match correct list comprehension /
 # generator expression variable names
 inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
 # Good variable names which should always be accepted, separated by a comma
 good-names=e,i,j,k,ex,Run,_,db,es
 # Bad variable names which should always be refused, separated by a comma
 bad-names=foo,bar,baz,toto,tutu,tata
 # Regular expression which should only match functions or classes name which do
 # not require a docstring
 no-docstring-rgx=__.*__
 [SIMILARITIES]
 # Minimum lines number of a similarity.
 min-similarity-lines=20
 # Ignore comments when computing similarities.
 ignore-comments=yes
 # Ignore docstrings when computing similarities.
 ignore-docstrings=yes
 [VARIABLES]
 # Tells whether we should check for unused import in __init__ files.
 init-import=no
 # A regular expression matching the beginning of the name of dummy variables
 # (i.e. not used).
 dummy-variables-rgx=_|unused_
 # List of additional names supposed to be defined in builtins. Remember that
 # you should avoid to define new builtins when possible.
 #additional-builtins=
 [CLASSES]
 # List of interface methods to ignore, separated by a comma. This is used for
 # instance to not check methods defines in Zope's Interface base class.
 #ignore-iface-methods=
 # List of method names used to declare (i.e. assign) instance attributes.
 defining-attr-methods=__init__,__new__,setUp
 [DESIGN]
 # Maximum number of arguments for function / method
 max-args=5
 # Argument names that match this expression will be ignored. Default to name
 # with leading underscore
 ignored-argument-names=_.*
 # Maximum number of locals for function / method body
 max-locals=15
 # Maximum number of return / yield for function / method body
 max-returns=6
 # Maximum number of branch for function / method body
 max-branchs=12
 # Maximum number of statements in function / method body
 max-statements=50
 # Maximum number of parents for a class (see R0901).
 max-parents=10
 # Maximum number of attributes for a class (see R0902).
 max-attributes=7
 # Minimum number of public methods for a class (see R0903).
 min-public-methods=2
 # Maximum number of public methods for a class (see R0904).
 max-public-methods=20
 [IMPORTS]
 # Deprecated modules which should not be used, separated by a comma.
 # __builtin__: Use the 'six.moves.builtins' module instead
 #   (or 'builtins' in Python 3).
 # apiclient: Use the 'googleapiclient' module instead.
 # Bastion: Dropped in Python 3.
 # ConfigParser: Use the 'six.moves.configparser' module instead
 #   (or 'configparser' in Python 3).
 # cookielib: Use the 'six.moves.http_cookiejar' module instead
 #   (or 'http.cookiejar' in Python 3).
 # cPickle: Use the 'pickle' module instead.
 # cStringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
 # exceptions: Dropped in Python 3.
 # HTMLParser: Use the 'six.moves.html_parser' module instead
 #   (or 'html.parser' in Python 3).
 # httplib: Use the 'six.moves.http_client' module instead
 #   (or 'http.client' in Python 3).
 # md5: Use the 'hashlib' module instead.
 # mox: Use the 'mock' module instead.
 # optparse: Use the 'argparse' module instead.
 # Queue: Use the 'six.moves.queue' module instead (or 'queue' in Python 3).
 # regsub: Use the 're' module instead.
 # rexec: Dropped in Python 3.
 # StringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
 # TERMIOS: Use the 'termios' module instead.
 # urllib2: Use the 'six.moves.urllib' module instead
 #   (or 'urllib.request' in Python 3).
 # urlparse: Use the 'six.moves.urllib' module instead
 #   (or 'urllib.parse' in Python 3).
 deprecated-modules=
    __builtin__,
    apiclient,
    Bastion,
    ConfigParser,
    cookielib,
    cPickle,
    cStringIO,
    exceptions,
    HTMLParser,
    httplib,
    md5,
    mox,
    optparse,
    Queue,
    regsub,
    rexec,
    StringIO,
    TERMIOS,
    urllib2,
    urlparse,
 # Create a graph of every (i.e. internal and external) dependencies in the
 # given file (report RP0402 must not be disabled)
 #import-graph=
 # Create a graph of external dependencies in the given file (report RP0402 must
 # not be disabled)
 #ext-import-graph=
 # Create a graph of internal dependencies in the given file (report RP0402 must
 # not be disabled)
 #int-import-graph=
 # Force import order to recognize a module as part of the standard
 # compatibility libraries.
 known-standard-library=
 # Force import order to recognize a module as part of a third party library.
 known-third-party=
    _emerge,
    apiclient,
    elftools,
    gcloud,
    google,
    googleapiclient,
    httplib2,
    jinja2,
    jsonschema,
    lddtree,
    magic,
    mock,
    oauth2client,
    portage,
    pylint,
    requests,
    six,
    sqlalchemy,
    yaml,
 [LOGGING]
 # Apply logging string format checks to calls on these modules.
 logging-modules=
    logging,
--- a/.style.yapf
+++ b/.style.yapf
@ -0,0 +1,11 @@
 #
 # To run yapf for this project, invoke as such from the base directory:
 #     yapf -i -r --style .style.yapf ./dfdewey/
 #
 [style]
 based_on_style = yapf
 COALESCE_BRACKETS = True
 SPLIT_BEFORE_FIRST_ARGUMENT = True
 SPLIT_PENALTY_AFTER_OPENING_BRACKET = 0
 SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 30
 SPLIT_BEFORE_NAMED_ASSIGNS = False
--- a/dfdewey/datastore/elastic.py
+++ b/dfdewey/datastore/elastic.py
@ -27,7 +27,7 @@ es_logger = logging.getLogger('dfdewey.elasticsearch')
 es_logger.setLevel(logging.WARNING)
-class ElasticsearchDataStore(object):
+class ElasticsearchDataStore():
  """Implements the datastore."""
  # Number of events to queue up when bulk inserting events.
@ -36,7 +36,7 @@ class ElasticsearchDataStore(object):
  def __init__(self, host='127.0.0.1', port=9200):
    """Create an Elasticsearch client."""
-    super(ElasticsearchDataStore, self).__init__()
+    super().__init__()
    self.client = Elasticsearch([{'host': host, 'port': port}], timeout=30)
    self.import_counter = collections.Counter()
    self.import_events = []
@ -79,8 +79,8 @@ class ElasticsearchDataStore(object):
    if not self.client.indices.exists(index_name):
      try:
        self.client.indices.create(index=index_name)
-      except exceptions.ConnectionError:
+      except exceptions.ConnectionError as e:
-        raise RuntimeError('Unable to connect to backend datastore.')
+        raise RuntimeError('Unable to connect to backend datastore.') from e
    if not isinstance(index_name, six.text_type):
      index_name = codecs.decode(index_name, 'utf8')
@ -97,12 +97,11 @@ class ElasticsearchDataStore(object):
      try:
        self.client.indices.delete(index=index_name)
      except exceptions.ConnectionError as e:
-        raise RuntimeError(
+        raise RuntimeError('Unable to connect to backend datastore.') from e
            'Unable to connect to backend datastore: {}'.format(e))
  def import_event(
-      self, index_name, event=None,
+      self, index_name, event=None, event_id=None,
-      event_id=None, flush_interval=DEFAULT_FLUSH_INTERVAL):
+      flush_interval=DEFAULT_FLUSH_INTERVAL):
    """Add event to Elasticsearch.
    Args:
@ -126,17 +125,8 @@ class ElasticsearchDataStore(object):
        event[k] = v
      # Header needed by Elasticsearch when bulk inserting.
-      header = {
+      header = {'index': {'_index': index_name}}
-          'index': {
+      update_header = {'update': {'_index': index_name, '_id': event_id}}
              '_index': index_name
          }
      }
      update_header = {
          'update': {
              '_index': index_name,
              '_id': event_id
          }
      }
      if event_id:
        # Event has "lang" defined if there is a script used for import.
@ -182,7 +172,4 @@ class ElasticsearchDataStore(object):
    search_type = 'query_then_fetch'
    return self.client.search(
-        body=query_dsl,
+        body=query_dsl, index=index_id, size=size, search_type=search_type)
        index=index_id,
        size=size,
        search_type=search_type)
--- a/dfdewey/datastore/postgresql.py
+++ b/dfdewey/datastore/postgresql.py
@ -24,22 +24,15 @@ postgresql_logger = logging.getLogger('dfdewey.postgresql')
 postgresql_logger.setLevel(logging.WARNING)
-class PostgresqlDataStore(object):
+class PostgresqlDataStore():
  """Implements the datastore."""
  def __init__(
-      self,
+      self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
      host='127.0.0.1',
      port=5432,
      db_name='dfdewey',
      autocommit=False):
    """Create a PostgreSQL client."""
-    super(PostgresqlDataStore, self).__init__()
+    super().__init__()
    self.db = psycopg2.connect(
-        database=db_name,
+        database=db_name, user='dfdewey', password='password', host=host,
        user='dfdewey',
        password='password',
        host=host,
        port=port)
    if autocommit:
      self.db.set_isolation_level(
@ -60,9 +53,7 @@ class PostgresqlDataStore(object):
      rows: Array of value tuples to be inserted
    """
    extras.execute_values(
-        self.cursor,
+        self.cursor, 'INSERT INTO {0:s} VALUES %s'.format(table_spec), rows)
        'INSERT INTO {0:s} VALUES %s'.format(table_spec),
        rows)
  def execute(self, command):
    """Execute a command in the PostgreSQL database.
@ -111,10 +102,7 @@ class PostgresqlDataStore(object):
    self.db.commit()
    self.db.close()
    self.db = psycopg2.connect(
-        database=db_name,
+        database=db_name, user='dfdewey', password='password', host=host,
        user='dfdewey',
        password='password',
        host=host,
        port=port)
    if autocommit:
      self.db.set_isolation_level(
@ -131,7 +119,8 @@ class PostgresqlDataStore(object):
    Returns:
      True if the table already exists, otherwise False
    """
-    self.cursor.execute("""
+    self.cursor.execute(
        """
        SELECT 1 FROM information_schema.tables
        WHERE table_schema = '{0:s}' AND table_name = '{1:s}'""".format(
            table_schema, table_name))
@ -149,9 +138,9 @@ class PostgresqlDataStore(object):
    Returns:
      True if the value exists, otherwise False
    """
-    self.cursor.execute("""
+    self.cursor.execute(
        """
        SELECT 1 from {0:s}
-        WHERE {1:s} = '{2:s}'""".format(
+        WHERE {1:s} = '{2:s}'""".format(table_name, column_name, value))
            table_name, column_name, value))
    return self.cursor.fetchone()
--- a/dfdewey/dfdcli.py
+++ b/dfdewey/dfdcli.py
@ -25,7 +25,6 @@ from dfdewey.datastore.elastic import ElasticsearchDataStore
 from dfdewey.datastore.postgresql import PostgresqlDataStore
 from dfdewey.utils import image
 STRING_INDEXING_LOG_INTERVAL = 10000000
@ -89,10 +88,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
  image_path = os.path.abspath(image_file)
  output_path = tempfile.mkdtemp()
-  cmd = ['bulk_extractor',
+  cmd = ['bulk_extractor', '-o', output_path, '-x', 'all', '-e', 'wordlist']
         '-o', output_path,
         '-x', 'all',
         '-e', 'wordlist']
  if base64:
    cmd.extend(['-e', 'base64'])
@ -109,7 +105,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
  print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd)))
  output = subprocess.check_output(cmd)
  md5_offset = output.index(b'MD5') + 19
-  image_hash = output[md5_offset:md5_offset+32].decode('utf-8')
+  image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
  print('String extraction completed: {0!s}'.format(datetime.datetime.now()))
  print('\n*** Parsing image')
@ -211,8 +207,9 @@ def search(query, case, image_path=None, query_list=None):
    images[image_hash[0]] = image_path
  else:
-    print('No image specified, searching all images in case \'{0:s}\''.format(
+    print(
-        case))
+        'No image specified, searching all images in case \'{0:s}\''.format(
            case))
    image_hashes = case_db.query(
        'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format(
            case))
@ -234,29 +231,28 @@ def search(query, case, image_path=None, query_list=None):
          term = ''.join(('"', term.strip(), '"'))
          results = search_index(index, term)
          if results['hits']['total']['value'] > 0:
-            print('{0:s} - {1:d} hits'.format(
+            print(
-                term, results['hits']['total']['value']))
+                '{0:s} - {1:d} hits'.format(
                    term, results['hits']['total']['value']))
    else:
      print('\n*** Searching for \'{0:s}\'...'.format(query))
      results = search_index(index, query)
      print('Returned {0:d} results:'.format(results['hits']['total']['value']))
      for hit in results['hits']['hits']:
        filename = image.get_filename_from_offset(
-            image_path,
+            image_path, hit['_source']['image'], int(hit['_source']['offset']))
            hit['_source']['image'],
            int(hit['_source']['offset']))
        if hit['_source']['file_offset']:
-          print('Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
+          print(
-                'String: {3:s}'.format(
+              'Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
-                    hit['_source']['offset'],
+              'String: {3:s}'.format(
-                    filename,
+                  hit['_source']['offset'], filename,
-                    hit['_source']['file_offset'],
+                  hit['_source']['file_offset'],
-                    hit['_source']['data'].strip()))
+                  hit['_source']['data'].strip()))
        else:
-          print('Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
+          print(
-              hit['_source']['offset'],
+              'Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
-              filename,
+                  hit['_source']['offset'], filename,
-              hit['_source']['data'].strip()))
+                  hit['_source']['data'].strip()))
 def search_index(index_id, search_query):
@ -278,8 +274,8 @@ def main():
  args = parse_args()
  if not args.search and not args.search_list:
    process_image(
-        args.image, args.case,
+        args.image, args.case, not args.no_base64, not args.no_gzip,
-        not args.no_base64, not args.no_gzip, not args.no_zip)
+        not args.no_zip)
  elif args.search:
    search(args.search, args.case, args.image)
  elif args.search_list:
--- a/dfdewey/utils/image.py
+++ b/dfdewey/utils/image.py
@ -14,9 +14,10 @@
 # limitations under the License.
 """Image File Access Functions."""
 from dfdewey.datastore.postgresql import PostgresqlDataStore
 import pytsk3
 from dfdewey.datastore.postgresql import PostgresqlDataStore
 def initialise_block_db(image_path, image_hash, case):
  """Creates a new image database.
@ -68,7 +69,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
    tracking_db.execute(
        'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
-    tracking_db.execute("""
+    tracking_db.execute(
        """
        CREATE TABLE image_case (
          case_id TEXT, image_hash TEXT REFERENCES images(image_hash), 
          PRIMARY KEY (case_id, image_hash))""")
@ -77,7 +79,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
  image_case_exists = False
  if image_exists:
-    image_case = tracking_db.query_single_row("""
+    image_case = tracking_db.query_single_row(
        """
        SELECT 1 from image_case
        WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format(
            image_hash, case))
@ -85,11 +88,13 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
      image_case_exists = True
  if not image_exists:
-    tracking_db.execute("""
+    tracking_db.execute(
        """
        INSERT INTO images (image_path, image_hash)
        VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash))
  if not image_case_exists:
-    tracking_db.execute("""
+    tracking_db.execute(
        """
        INSERT INTO image_case (case_id, image_hash)
        VALUES ('{0:s}', '{1:s}')""".format(case, image_hash))
@ -118,18 +123,25 @@ def populate_block_db(img, block_db, batch_size=1500):
      has_partition_table = True
    rows = []
    for part in volume:
-      print('Parsing partition {0:d}: {1:s}'.format(
+      print(
-          part.addr, part.desc.decode('utf-8')))
+          'Parsing partition {0:d}: {1:s}'.format(
              part.addr, part.desc.decode('utf-8')))
      if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
        continue
-      fs = pytsk3.FS_Info(img, offset=part.start * volume.info.block_size)
+      filesystem = pytsk3.FS_Info(
-      for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
+          img, offset=part.start * volume.info.block_size)
-        file = fs.open_meta(inode)
+      for inode in range(filesystem.info.first_inum,
                         filesystem.info.last_inum + 1):
        file = filesystem.open_meta(inode)
        if file.info.meta.nlink > 0:
          for attr in file:
            for run in attr:
              for block in range(run.len):
-                rows.append((run.addr + block, inode, part.addr,))
+                rows.append((
                    run.addr + block,
                    inode,
                    part.addr,
                ))
                if len(rows) >= batch_size:
                  block_db.bulk_insert('blocks (block, inum, part)', rows)
                  rows = []
@ -137,22 +149,26 @@ def populate_block_db(img, block_db, batch_size=1500):
        block_db.bulk_insert('blocks (block, inum, part)', rows)
      # File names
-      directory = fs.open_dir(path='/')
+      directory = filesystem.open_dir(path='/')
      list_directory(block_db, directory, part=part.addr, batch_size=batch_size)
  except IOError:
    pass
  if not has_partition_table:
-    fs = pytsk3.FS_Info(img)
+    filesystem = pytsk3.FS_Info(img)
    rows = []
-    for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
+    for inode in range(filesystem.info.first_inum,
                       filesystem.info.last_inum + 1):
      try:
-        file = fs.open_meta(inode)
+        file = filesystem.open_meta(inode)
        if file.info.meta.nlink > 0:
          for attr in file:
            for run in attr:
              for block in range(run.len):
-                rows.append((run.addr + block, inode,))
+                rows.append((
                    run.addr + block,
                    inode,
                ))
                if len(rows) >= batch_size:
                  block_db.bulk_insert('blocks (block, inum)', rows)
                  rows = []
@ -162,7 +178,7 @@ def populate_block_db(img, block_db, batch_size=1500):
        continue
    # File names
-    directory = fs.open_dir(path='/')
+    directory = filesystem.open_dir(path='/')
    list_directory(block_db, directory, batch_size=batch_size)
  block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);')
@ -205,15 +221,19 @@ def list_directory(
      print('Unable to decode: {}'.format(directory_entry.info.name.name))
      continue
    if part:
-      rows.append((directory_entry.info.meta.addr,
+      rows.append((
-                   name.replace('\'', '\'\''),
+          directory_entry.info.meta.addr,
-                   part,))
+          name.replace('\'', '\'\''),
          part,
      ))
      if len(rows) >= batch_size:
        block_db.bulk_insert('files (inum, filename, part)', rows)
        rows = []
    else:
-      rows.append((directory_entry.info.meta.addr,
+      rows.append((
-                   name.replace('\'', '\'\''),))
+          directory_entry.info.meta.addr,
          name.replace('\'', '\'\''),
      ))
      if len(rows) >= batch_size:
        block_db.bulk_insert('files (inum, filename)', rows)
        rows = []
@ -224,11 +244,7 @@ def list_directory(
      if inode not in stack:
        rows = list_directory(
-            block_db,
+            block_db, sub_directory, part=part, stack=stack, rows=rows,
            sub_directory,
            part=part,
            stack=stack,
            rows=rows,
            batch_size=batch_size)
    except IOError:
@ -281,14 +297,14 @@ def get_filename_from_offset(image_path, image_hash, offset):
  if not unalloc_part:
    try:
      if not partition_offset:
-        fs = pytsk3.FS_Info(img)
+        filesystem = pytsk3.FS_Info(img)
      else:
        offset -= partition_offset * device_block_size
-        fs = pytsk3.FS_Info(
+        filesystem = pytsk3.FS_Info(
            img, offset=partition_offset * device_block_size)
    except TypeError as e:
      print(e)
-    block_size = fs.info.block_size
+    block_size = filesystem.info.block_size
    inums = get_inums(block_db, offset / block_size, part=partition)
@ -296,7 +312,7 @@ def get_filename_from_offset(image_path, image_hash, offset):
  if inums:
    for i in inums:
      real_inum = i[0]
-      if i[0] == 0 and fs.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
+      if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
        mft_record_size_offset = 0x40
        if partition_offset:
          mft_record_size_offset = \
@ -304,10 +320,10 @@ def get_filename_from_offset(image_path, image_hash, offset):
        mft_record_size = int.from_bytes(
            img.read(mft_record_size_offset, 1), 'little', signed=True)
        if mft_record_size < 0:
-          mft_record_size = 2 ** (mft_record_size * -1)
+          mft_record_size = 2**(mft_record_size * -1)
        else:
          mft_record_size = mft_record_size * block_size
-        real_inum = get_resident_inum(offset, fs, mft_record_size)
+        real_inum = get_resident_inum(offset, filesystem, mft_record_size)
      filename = get_filename(block_db, real_inum, part=partition)
      if filename and not filenames:
        filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
@ -343,21 +359,21 @@ def get_inums(block_db, block, part=None):
  return inums
-def get_resident_inum(offset, fs, mft_record_size):
+def get_resident_inum(offset, filesystem, mft_record_size):
  """Gets the inode number associated with NTFS $MFT resident data.
  Args:
    offset: Data offset within volume
-    fs: pytsk3 FS_INFO object
+    filesystem: pytsk3 FS_INFO object
    mft_record_size: Size of an $MFT entry
  Returns:
    inode number of resident data
  """
-  block_size = fs.info.block_size
+  block_size = filesystem.info.block_size
  offset_block = int(offset / block_size)
-  inode = fs.open_meta(0)
+  inode = filesystem.open_meta(0)
  mft_entry = 0
  for attr in inode:
    for run in attr:
--- a/dfdewey/yapf_test.py
+++ b/dfdewey/yapf_test.py
@ -0,0 +1,42 @@
 # -*- coding: utf-8 -*-
 # Copyright 2020 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Enforce code style with YAPF."""
 import os
 import subprocess
 import unittest
 class StyleTest(unittest.TestCase):
  """Enforce code style requirements."""
  def testCodeStyle(self):
    """Check YAPF style enforcement runs cleanly."""
    dfdewey_path = os.path.abspath(os.path.dirname(__file__))
    config_path = os.path.join(dfdewey_path, '..', '.style.yapf')
    try:
      subprocess.check_output(
          ['yapf', '--style', config_path, '--diff', '-r', dfdewey_path])
    except subprocess.CalledProcessError as e:
      if hasattr(e, 'output'):
        raise Exception(
            'Run "yapf --style {0:s} -i -r {1:s}" '
            'to correct these problems: {2:s}'.format(
                config_path, dfdewey_path, e.output.decode('utf-8'))) from e
      raise
 if __name__ == '__main__':
  unittest.main()
--- a/run_tests.py
+++ b/run_tests.py
@ -0,0 +1,23 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Copyright 2020 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Script to run tests."""
 import subprocess
 if __name__ == '__main__':
  subprocess.check_call([
      'nosetests', '-vv', '--with-coverage', '--cover-package=dfdewey', '--exe'
  ])
--- a/setup.py
+++ b/setup.py
@ -20,9 +20,13 @@ import sys
 from setuptools import find_packages
 from setuptools import setup
 import dfdewey
 sys.path.insert(0, '.')
-import dfdewey
+DFDEWEY_DESCRIPTION = (
    'dfDewey is a digital forensics string extraction, indexing, and searching '
    'tool.')
 requirements = []
 with open('requirements.txt','r') as f:
@ -30,7 +34,7 @@ with open('requirements.txt','r') as f:
 setup(
    name='dfDewey',
    version=dfdewey.__version__,
-    description='dfDewey is a digital forensics string extraction, indexing, and searching tool.',
+    description=DFDEWEY_DESCRIPTION,
    license='Apache License, Version 2.0',
    maintainer='dfDewey development team',
    maintainer_email='dfdewey-dev@googlegroups.com',
@ -38,6 +42,6 @@ setup(
    include_package_data=True,
    install_requires=requirements,
    extras_require={
-        'dev': []
+        'dev': ['mock', 'nose', 'yapf', 'coverage']
    }
 )