Formatting test
This commit is contained in:
parent
d277394b5c
commit
0ff2bed3ef
12 changed files with 560 additions and 112 deletions
15
.gitignore
vendored
15
.gitignore
vendored
|
@ -3,12 +3,27 @@
|
|||
# Back-up files
|
||||
*~
|
||||
|
||||
# Don't include build related files.
|
||||
/build/
|
||||
/dist/
|
||||
|
||||
# Egg files
|
||||
/dfDewey.egg-info
|
||||
|
||||
# Test files
|
||||
.coverage
|
||||
|
||||
# Generic auto-generated build files
|
||||
*.pyc
|
||||
*.pyo
|
||||
|
||||
# Specific auto-generated build files
|
||||
/__pycache__
|
||||
|
||||
# IDE files
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# Pipfile
|
||||
Pipfile
|
||||
Pipfile.lock
|
||||
|
|
365
.pylintrc
Normal file
365
.pylintrc
Normal file
|
@ -0,0 +1,365 @@
|
|||
# Original file copied from:
|
||||
# https://chromium.googlesource.com/chromiumos/chromite/+/master/pylintrc
|
||||
|
||||
[MASTER]
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Profiled execution.
|
||||
#profile=no
|
||||
|
||||
# Add <file or directory> to the black list. It should be a base name, not a
|
||||
# path. You may set this option multiple times.
|
||||
ignore=CVS
|
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
ignore-patterns=
|
||||
.*_pb2\.py$
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
#load-plugins=
|
||||
|
||||
# Configure quote preferences.
|
||||
string-quote = single-avoid-escape
|
||||
triple-quote = double
|
||||
docstring-quote = double
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple times.
|
||||
# cros-logging-import: logging is deprecated. Use "from chromite.lib import
|
||||
# cros_logging as logging" to import chromite/lib/cros_logging.
|
||||
# eq-without-hash: We omit this as we don't require all objects be hashable.
|
||||
# We'll wait for unittest coverage to detect missing __hash__ on objects.
|
||||
# no-absolute-import: We don't seem to rely on this behavior, so don't enforce
|
||||
# using this future import everywhere.
|
||||
# round-builtin: We omit this as all our usage of round() is OK with either
|
||||
# Python 2 or 3 behavior (and probably leans towards 3 anyways).
|
||||
#enable=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once). You can also use "--disable=all" to
|
||||
# disable everything first and then reenable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||
# --disable=W".
|
||||
disable=
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, msvs
|
||||
# (visual studio) and html
|
||||
output-format=text
|
||||
|
||||
# Put messages in a separate file for each module / package specified on the
|
||||
# command line instead of printing them on stdout. Reports (if any) will be
|
||||
# written in a file name "pylint_global.[txt|html]".
|
||||
files-output=no
|
||||
|
||||
# Tells whether to display a full report or only the messages
|
||||
# CHANGE: No report.
|
||||
reports=no
|
||||
|
||||
# Activate the evaluation score.
|
||||
score=no
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Add a comment according to your evaluation note. This is used by the global
|
||||
# evaluation report (RP0004).
|
||||
comment=no
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,XXX,TODO
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=80
|
||||
|
||||
# Maximum number of lines in a module
|
||||
max-module-lines=1000
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
# CHANGE: Use " " instead.
|
||||
indent-string=' '
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# List of classes names for which member attributes should not be checked
|
||||
# (useful for classes with attributes dynamically set).
|
||||
ignored-classes=pytsk3
|
||||
|
||||
# When zope mode is activated, add a predefined set of Zope acquired attributes
|
||||
# to generated-members.
|
||||
#zope=no
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E0201 when accessed.
|
||||
# CHANGE: Added 'AndRaise', 'AndReturn', 'InAnyOrder' and 'MultipleTimes' for pymox.
|
||||
# CHANGE: Added tempdir for @osutils.TempDirDecorator.
|
||||
#generated-members=
|
||||
|
||||
# List of modules for which member attributes should not be checked.
|
||||
# Modules listed here will not trigger import errors even if the linter can't
|
||||
# import them.
|
||||
#
|
||||
# pytest: Made available by our testing virtualenv and can be assumed exists.
|
||||
ignored-modules=pytest
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Required attributes for module, separated by a comma
|
||||
#required-attributes=
|
||||
|
||||
# List of builtins function names that should not be used, separated by a comma.
|
||||
# exit & quit are for the interactive interpreter shell only.
|
||||
# https://docs.python.org/3/library/constants.html#constants-added-by-the-site-module
|
||||
bad-functions=
|
||||
apply,
|
||||
exit,
|
||||
filter,
|
||||
input,
|
||||
map,
|
||||
quit,
|
||||
raw_input,
|
||||
reduce,
|
||||
|
||||
# Regular expression which should only match correct module names
|
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Regular expression which should only match correct module level names
|
||||
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Regular expression which should only match correct class names
|
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Regular expression which should only match correct function names
|
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct method names
|
||||
method-rgx=(test[A-Za-z0-9_]{2,30})|([a-z_][a-z0-9_]{2,30})$
|
||||
|
||||
# Regular expression which should only match correct instance attribute names
|
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct argument names
|
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct variable names
|
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct list comprehension /
|
||||
# generator expression variable names
|
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma
|
||||
good-names=e,i,j,k,ex,Run,_,db,es
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma
|
||||
bad-names=foo,bar,baz,toto,tutu,tata
|
||||
|
||||
# Regular expression which should only match functions or classes name which do
|
||||
# not require a docstring
|
||||
no-docstring-rgx=__.*__
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=20
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# A regular expression matching the beginning of the name of dummy variables
|
||||
# (i.e. not used).
|
||||
dummy-variables-rgx=_|unused_
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid to define new builtins when possible.
|
||||
#additional-builtins=
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of interface methods to ignore, separated by a comma. This is used for
|
||||
# instance to not check methods defines in Zope's Interface base class.
|
||||
#ignore-iface-methods=
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,__new__,setUp
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=5
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore
|
||||
ignored-argument-names=_.*
|
||||
|
||||
# Maximum number of locals for function / method body
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of return / yield for function / method body
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of branch for function / method body
|
||||
max-branchs=12
|
||||
|
||||
# Maximum number of statements in function / method body
|
||||
max-statements=50
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=10
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma.
|
||||
# __builtin__: Use the 'six.moves.builtins' module instead
|
||||
# (or 'builtins' in Python 3).
|
||||
# apiclient: Use the 'googleapiclient' module instead.
|
||||
# Bastion: Dropped in Python 3.
|
||||
# ConfigParser: Use the 'six.moves.configparser' module instead
|
||||
# (or 'configparser' in Python 3).
|
||||
# cookielib: Use the 'six.moves.http_cookiejar' module instead
|
||||
# (or 'http.cookiejar' in Python 3).
|
||||
# cPickle: Use the 'pickle' module instead.
|
||||
# cStringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
|
||||
# exceptions: Dropped in Python 3.
|
||||
# HTMLParser: Use the 'six.moves.html_parser' module instead
|
||||
# (or 'html.parser' in Python 3).
|
||||
# httplib: Use the 'six.moves.http_client' module instead
|
||||
# (or 'http.client' in Python 3).
|
||||
# md5: Use the 'hashlib' module instead.
|
||||
# mox: Use the 'mock' module instead.
|
||||
# optparse: Use the 'argparse' module instead.
|
||||
# Queue: Use the 'six.moves.queue' module instead (or 'queue' in Python 3).
|
||||
# regsub: Use the 're' module instead.
|
||||
# rexec: Dropped in Python 3.
|
||||
# StringIO: Use 'io.StringIO' or 'io.BytesIO' instead.
|
||||
# TERMIOS: Use the 'termios' module instead.
|
||||
# urllib2: Use the 'six.moves.urllib' module instead
|
||||
# (or 'urllib.request' in Python 3).
|
||||
# urlparse: Use the 'six.moves.urllib' module instead
|
||||
# (or 'urllib.parse' in Python 3).
|
||||
deprecated-modules=
|
||||
__builtin__,
|
||||
apiclient,
|
||||
Bastion,
|
||||
ConfigParser,
|
||||
cookielib,
|
||||
cPickle,
|
||||
cStringIO,
|
||||
exceptions,
|
||||
HTMLParser,
|
||||
httplib,
|
||||
md5,
|
||||
mox,
|
||||
optparse,
|
||||
Queue,
|
||||
regsub,
|
||||
rexec,
|
||||
StringIO,
|
||||
TERMIOS,
|
||||
urllib2,
|
||||
urlparse,
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled)
|
||||
#import-graph=
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
#ext-import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
#int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
# compatibility libraries.
|
||||
known-standard-library=
|
||||
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=
|
||||
_emerge,
|
||||
apiclient,
|
||||
elftools,
|
||||
gcloud,
|
||||
google,
|
||||
googleapiclient,
|
||||
httplib2,
|
||||
jinja2,
|
||||
jsonschema,
|
||||
lddtree,
|
||||
magic,
|
||||
mock,
|
||||
oauth2client,
|
||||
portage,
|
||||
pylint,
|
||||
requests,
|
||||
six,
|
||||
sqlalchemy,
|
||||
yaml,
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# Apply logging string format checks to calls on these modules.
|
||||
logging-modules=
|
||||
logging,
|
11
.style.yapf
Normal file
11
.style.yapf
Normal file
|
@ -0,0 +1,11 @@
|
|||
#
|
||||
# To run yapf for this project, invoke as such from the base directory:
|
||||
# yapf -i -r --style .style.yapf ./dfdewey/
|
||||
#
|
||||
[style]
|
||||
based_on_style = yapf
|
||||
COALESCE_BRACKETS = True
|
||||
SPLIT_BEFORE_FIRST_ARGUMENT = True
|
||||
SPLIT_PENALTY_AFTER_OPENING_BRACKET = 0
|
||||
SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 30
|
||||
SPLIT_BEFORE_NAMED_ASSIGNS = False
|
|
@ -12,4 +12,4 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""DFDewey Datastore Package."""
|
||||
"""DFDewey Datastore Package."""
|
||||
|
|
|
@ -27,7 +27,7 @@ es_logger = logging.getLogger('dfdewey.elasticsearch')
|
|||
es_logger.setLevel(logging.WARNING)
|
||||
|
||||
|
||||
class ElasticsearchDataStore(object):
|
||||
class ElasticsearchDataStore():
|
||||
"""Implements the datastore."""
|
||||
|
||||
# Number of events to queue up when bulk inserting events.
|
||||
|
@ -36,7 +36,7 @@ class ElasticsearchDataStore(object):
|
|||
|
||||
def __init__(self, host='127.0.0.1', port=9200):
|
||||
"""Create an Elasticsearch client."""
|
||||
super(ElasticsearchDataStore, self).__init__()
|
||||
super().__init__()
|
||||
self.client = Elasticsearch([{'host': host, 'port': port}], timeout=30)
|
||||
self.import_counter = collections.Counter()
|
||||
self.import_events = []
|
||||
|
@ -79,8 +79,8 @@ class ElasticsearchDataStore(object):
|
|||
if not self.client.indices.exists(index_name):
|
||||
try:
|
||||
self.client.indices.create(index=index_name)
|
||||
except exceptions.ConnectionError:
|
||||
raise RuntimeError('Unable to connect to backend datastore.')
|
||||
except exceptions.ConnectionError as e:
|
||||
raise RuntimeError('Unable to connect to backend datastore.') from e
|
||||
|
||||
if not isinstance(index_name, six.text_type):
|
||||
index_name = codecs.decode(index_name, 'utf8')
|
||||
|
@ -97,12 +97,11 @@ class ElasticsearchDataStore(object):
|
|||
try:
|
||||
self.client.indices.delete(index=index_name)
|
||||
except exceptions.ConnectionError as e:
|
||||
raise RuntimeError(
|
||||
'Unable to connect to backend datastore: {}'.format(e))
|
||||
raise RuntimeError('Unable to connect to backend datastore.') from e
|
||||
|
||||
def import_event(
|
||||
self, index_name, event=None,
|
||||
event_id=None, flush_interval=DEFAULT_FLUSH_INTERVAL):
|
||||
self, index_name, event=None, event_id=None,
|
||||
flush_interval=DEFAULT_FLUSH_INTERVAL):
|
||||
"""Add event to Elasticsearch.
|
||||
|
||||
Args:
|
||||
|
@ -126,17 +125,8 @@ class ElasticsearchDataStore(object):
|
|||
event[k] = v
|
||||
|
||||
# Header needed by Elasticsearch when bulk inserting.
|
||||
header = {
|
||||
'index': {
|
||||
'_index': index_name
|
||||
}
|
||||
}
|
||||
update_header = {
|
||||
'update': {
|
||||
'_index': index_name,
|
||||
'_id': event_id
|
||||
}
|
||||
}
|
||||
header = {'index': {'_index': index_name}}
|
||||
update_header = {'update': {'_index': index_name, '_id': event_id}}
|
||||
|
||||
if event_id:
|
||||
# Event has "lang" defined if there is a script used for import.
|
||||
|
@ -182,7 +172,4 @@ class ElasticsearchDataStore(object):
|
|||
search_type = 'query_then_fetch'
|
||||
|
||||
return self.client.search(
|
||||
body=query_dsl,
|
||||
index=index_id,
|
||||
size=size,
|
||||
search_type=search_type)
|
||||
body=query_dsl, index=index_id, size=size, search_type=search_type)
|
||||
|
|
|
@ -24,22 +24,15 @@ postgresql_logger = logging.getLogger('dfdewey.postgresql')
|
|||
postgresql_logger.setLevel(logging.WARNING)
|
||||
|
||||
|
||||
class PostgresqlDataStore(object):
|
||||
class PostgresqlDataStore():
|
||||
"""Implements the datastore."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host='127.0.0.1',
|
||||
port=5432,
|
||||
db_name='dfdewey',
|
||||
autocommit=False):
|
||||
self, host='127.0.0.1', port=5432, db_name='dfdewey', autocommit=False):
|
||||
"""Create a PostgreSQL client."""
|
||||
super(PostgresqlDataStore, self).__init__()
|
||||
super().__init__()
|
||||
self.db = psycopg2.connect(
|
||||
database=db_name,
|
||||
user='dfdewey',
|
||||
password='password',
|
||||
host=host,
|
||||
database=db_name, user='dfdewey', password='password', host=host,
|
||||
port=port)
|
||||
if autocommit:
|
||||
self.db.set_isolation_level(
|
||||
|
@ -60,9 +53,7 @@ class PostgresqlDataStore(object):
|
|||
rows: Array of value tuples to be inserted
|
||||
"""
|
||||
extras.execute_values(
|
||||
self.cursor,
|
||||
'INSERT INTO {0:s} VALUES %s'.format(table_spec),
|
||||
rows)
|
||||
self.cursor, 'INSERT INTO {0:s} VALUES %s'.format(table_spec), rows)
|
||||
|
||||
def execute(self, command):
|
||||
"""Execute a command in the PostgreSQL database.
|
||||
|
@ -111,10 +102,7 @@ class PostgresqlDataStore(object):
|
|||
self.db.commit()
|
||||
self.db.close()
|
||||
self.db = psycopg2.connect(
|
||||
database=db_name,
|
||||
user='dfdewey',
|
||||
password='password',
|
||||
host=host,
|
||||
database=db_name, user='dfdewey', password='password', host=host,
|
||||
port=port)
|
||||
if autocommit:
|
||||
self.db.set_isolation_level(
|
||||
|
@ -131,7 +119,8 @@ class PostgresqlDataStore(object):
|
|||
Returns:
|
||||
True if the table already exists, otherwise False
|
||||
"""
|
||||
self.cursor.execute("""
|
||||
self.cursor.execute(
|
||||
"""
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = '{0:s}' AND table_name = '{1:s}'""".format(
|
||||
table_schema, table_name))
|
||||
|
@ -149,9 +138,9 @@ class PostgresqlDataStore(object):
|
|||
Returns:
|
||||
True if the value exists, otherwise False
|
||||
"""
|
||||
self.cursor.execute("""
|
||||
self.cursor.execute(
|
||||
"""
|
||||
SELECT 1 from {0:s}
|
||||
WHERE {1:s} = '{2:s}'""".format(
|
||||
table_name, column_name, value))
|
||||
WHERE {1:s} = '{2:s}'""".format(table_name, column_name, value))
|
||||
|
||||
return self.cursor.fetchone()
|
||||
|
|
|
@ -25,7 +25,6 @@ from dfdewey.datastore.elastic import ElasticsearchDataStore
|
|||
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
||||
from dfdewey.utils import image
|
||||
|
||||
|
||||
STRING_INDEXING_LOG_INTERVAL = 10000000
|
||||
|
||||
|
||||
|
@ -89,10 +88,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
|
|||
image_path = os.path.abspath(image_file)
|
||||
output_path = tempfile.mkdtemp()
|
||||
|
||||
cmd = ['bulk_extractor',
|
||||
'-o', output_path,
|
||||
'-x', 'all',
|
||||
'-e', 'wordlist']
|
||||
cmd = ['bulk_extractor', '-o', output_path, '-x', 'all', '-e', 'wordlist']
|
||||
|
||||
if base64:
|
||||
cmd.extend(['-e', 'base64'])
|
||||
|
@ -109,7 +105,7 @@ def process_image(image_file, case, base64, gunzip, unzip):
|
|||
print('\n*** Running bulk extractor:\n{0:s}'.format(' '.join(cmd)))
|
||||
output = subprocess.check_output(cmd)
|
||||
md5_offset = output.index(b'MD5') + 19
|
||||
image_hash = output[md5_offset:md5_offset+32].decode('utf-8')
|
||||
image_hash = output[md5_offset:md5_offset + 32].decode('utf-8')
|
||||
print('String extraction completed: {0!s}'.format(datetime.datetime.now()))
|
||||
|
||||
print('\n*** Parsing image')
|
||||
|
@ -211,8 +207,9 @@ def search(query, case, image_path=None, query_list=None):
|
|||
|
||||
images[image_hash[0]] = image_path
|
||||
else:
|
||||
print('No image specified, searching all images in case \'{0:s}\''.format(
|
||||
case))
|
||||
print(
|
||||
'No image specified, searching all images in case \'{0:s}\''.format(
|
||||
case))
|
||||
image_hashes = case_db.query(
|
||||
'SELECT image_hash FROM image_case WHERE case_id = \'{0:s}\''.format(
|
||||
case))
|
||||
|
@ -234,29 +231,28 @@ def search(query, case, image_path=None, query_list=None):
|
|||
term = ''.join(('"', term.strip(), '"'))
|
||||
results = search_index(index, term)
|
||||
if results['hits']['total']['value'] > 0:
|
||||
print('{0:s} - {1:d} hits'.format(
|
||||
term, results['hits']['total']['value']))
|
||||
print(
|
||||
'{0:s} - {1:d} hits'.format(
|
||||
term, results['hits']['total']['value']))
|
||||
else:
|
||||
print('\n*** Searching for \'{0:s}\'...'.format(query))
|
||||
results = search_index(index, query)
|
||||
print('Returned {0:d} results:'.format(results['hits']['total']['value']))
|
||||
for hit in results['hits']['hits']:
|
||||
filename = image.get_filename_from_offset(
|
||||
image_path,
|
||||
hit['_source']['image'],
|
||||
int(hit['_source']['offset']))
|
||||
image_path, hit['_source']['image'], int(hit['_source']['offset']))
|
||||
if hit['_source']['file_offset']:
|
||||
print('Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
|
||||
'String: {3:s}'.format(
|
||||
hit['_source']['offset'],
|
||||
filename,
|
||||
hit['_source']['file_offset'],
|
||||
hit['_source']['data'].strip()))
|
||||
print(
|
||||
'Offset: {0:d}\tFile: {1:s}\tFile offset:{2:s}\t'
|
||||
'String: {3:s}'.format(
|
||||
hit['_source']['offset'], filename,
|
||||
hit['_source']['file_offset'],
|
||||
hit['_source']['data'].strip()))
|
||||
else:
|
||||
print('Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
|
||||
hit['_source']['offset'],
|
||||
filename,
|
||||
hit['_source']['data'].strip()))
|
||||
print(
|
||||
'Offset: {0:d}\tFile: {1:s}\tString: {2:s}'.format(
|
||||
hit['_source']['offset'], filename,
|
||||
hit['_source']['data'].strip()))
|
||||
|
||||
|
||||
def search_index(index_id, search_query):
|
||||
|
@ -278,8 +274,8 @@ def main():
|
|||
args = parse_args()
|
||||
if not args.search and not args.search_list:
|
||||
process_image(
|
||||
args.image, args.case,
|
||||
not args.no_base64, not args.no_gzip, not args.no_zip)
|
||||
args.image, args.case, not args.no_base64, not args.no_gzip,
|
||||
not args.no_zip)
|
||||
elif args.search:
|
||||
search(args.search, args.case, args.image)
|
||||
elif args.search_list:
|
||||
|
|
|
@ -12,4 +12,4 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""DFDewey Utils Package."""
|
||||
"""DFDewey Utils Package."""
|
||||
|
|
|
@ -14,9 +14,10 @@
|
|||
# limitations under the License.
|
||||
"""Image File Access Functions."""
|
||||
|
||||
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
||||
import pytsk3
|
||||
|
||||
from dfdewey.datastore.postgresql import PostgresqlDataStore
|
||||
|
||||
|
||||
def initialise_block_db(image_path, image_hash, case):
|
||||
"""Creates a new image database.
|
||||
|
@ -68,7 +69,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
|
|||
tracking_db.execute(
|
||||
'CREATE TABLE images (image_path TEXT, image_hash TEXT PRIMARY KEY)')
|
||||
|
||||
tracking_db.execute("""
|
||||
tracking_db.execute(
|
||||
"""
|
||||
CREATE TABLE image_case (
|
||||
case_id TEXT, image_hash TEXT REFERENCES images(image_hash),
|
||||
PRIMARY KEY (case_id, image_hash))""")
|
||||
|
@ -77,7 +79,8 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
|
|||
|
||||
image_case_exists = False
|
||||
if image_exists:
|
||||
image_case = tracking_db.query_single_row("""
|
||||
image_case = tracking_db.query_single_row(
|
||||
"""
|
||||
SELECT 1 from image_case
|
||||
WHERE image_hash = '{0:s}' AND case_id = '{1:s}'""".format(
|
||||
image_hash, case))
|
||||
|
@ -85,11 +88,13 @@ def check_tracking_database(tracking_db, image_path, image_hash, case):
|
|||
image_case_exists = True
|
||||
|
||||
if not image_exists:
|
||||
tracking_db.execute("""
|
||||
tracking_db.execute(
|
||||
"""
|
||||
INSERT INTO images (image_path, image_hash)
|
||||
VALUES ('{0:s}', '{1:s}')""".format(image_path, image_hash))
|
||||
if not image_case_exists:
|
||||
tracking_db.execute("""
|
||||
tracking_db.execute(
|
||||
"""
|
||||
INSERT INTO image_case (case_id, image_hash)
|
||||
VALUES ('{0:s}', '{1:s}')""".format(case, image_hash))
|
||||
|
||||
|
@ -118,18 +123,25 @@ def populate_block_db(img, block_db, batch_size=1500):
|
|||
has_partition_table = True
|
||||
rows = []
|
||||
for part in volume:
|
||||
print('Parsing partition {0:d}: {1:s}'.format(
|
||||
part.addr, part.desc.decode('utf-8')))
|
||||
print(
|
||||
'Parsing partition {0:d}: {1:s}'.format(
|
||||
part.addr, part.desc.decode('utf-8')))
|
||||
if part.flags != pytsk3.TSK_VS_PART_FLAG_ALLOC:
|
||||
continue
|
||||
fs = pytsk3.FS_Info(img, offset=part.start * volume.info.block_size)
|
||||
for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
|
||||
file = fs.open_meta(inode)
|
||||
filesystem = pytsk3.FS_Info(
|
||||
img, offset=part.start * volume.info.block_size)
|
||||
for inode in range(filesystem.info.first_inum,
|
||||
filesystem.info.last_inum + 1):
|
||||
file = filesystem.open_meta(inode)
|
||||
if file.info.meta.nlink > 0:
|
||||
for attr in file:
|
||||
for run in attr:
|
||||
for block in range(run.len):
|
||||
rows.append((run.addr + block, inode, part.addr,))
|
||||
rows.append((
|
||||
run.addr + block,
|
||||
inode,
|
||||
part.addr,
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('blocks (block, inum, part)', rows)
|
||||
rows = []
|
||||
|
@ -137,22 +149,26 @@ def populate_block_db(img, block_db, batch_size=1500):
|
|||
block_db.bulk_insert('blocks (block, inum, part)', rows)
|
||||
|
||||
# File names
|
||||
directory = fs.open_dir(path='/')
|
||||
directory = filesystem.open_dir(path='/')
|
||||
list_directory(block_db, directory, part=part.addr, batch_size=batch_size)
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
if not has_partition_table:
|
||||
fs = pytsk3.FS_Info(img)
|
||||
filesystem = pytsk3.FS_Info(img)
|
||||
rows = []
|
||||
for inode in range(fs.info.first_inum, fs.info.last_inum + 1):
|
||||
for inode in range(filesystem.info.first_inum,
|
||||
filesystem.info.last_inum + 1):
|
||||
try:
|
||||
file = fs.open_meta(inode)
|
||||
file = filesystem.open_meta(inode)
|
||||
if file.info.meta.nlink > 0:
|
||||
for attr in file:
|
||||
for run in attr:
|
||||
for block in range(run.len):
|
||||
rows.append((run.addr + block, inode,))
|
||||
rows.append((
|
||||
run.addr + block,
|
||||
inode,
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('blocks (block, inum)', rows)
|
||||
rows = []
|
||||
|
@ -162,7 +178,7 @@ def populate_block_db(img, block_db, batch_size=1500):
|
|||
continue
|
||||
|
||||
# File names
|
||||
directory = fs.open_dir(path='/')
|
||||
directory = filesystem.open_dir(path='/')
|
||||
list_directory(block_db, directory, batch_size=batch_size)
|
||||
|
||||
block_db.execute('CREATE INDEX blocks_index ON blocks (block, part);')
|
||||
|
@ -205,15 +221,19 @@ def list_directory(
|
|||
print('Unable to decode: {}'.format(directory_entry.info.name.name))
|
||||
continue
|
||||
if part:
|
||||
rows.append((directory_entry.info.meta.addr,
|
||||
name.replace('\'', '\'\''),
|
||||
part,))
|
||||
rows.append((
|
||||
directory_entry.info.meta.addr,
|
||||
name.replace('\'', '\'\''),
|
||||
part,
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('files (inum, filename, part)', rows)
|
||||
rows = []
|
||||
else:
|
||||
rows.append((directory_entry.info.meta.addr,
|
||||
name.replace('\'', '\'\''),))
|
||||
rows.append((
|
||||
directory_entry.info.meta.addr,
|
||||
name.replace('\'', '\'\''),
|
||||
))
|
||||
if len(rows) >= batch_size:
|
||||
block_db.bulk_insert('files (inum, filename)', rows)
|
||||
rows = []
|
||||
|
@ -224,11 +244,7 @@ def list_directory(
|
|||
|
||||
if inode not in stack:
|
||||
rows = list_directory(
|
||||
block_db,
|
||||
sub_directory,
|
||||
part=part,
|
||||
stack=stack,
|
||||
rows=rows,
|
||||
block_db, sub_directory, part=part, stack=stack, rows=rows,
|
||||
batch_size=batch_size)
|
||||
|
||||
except IOError:
|
||||
|
@ -281,14 +297,14 @@ def get_filename_from_offset(image_path, image_hash, offset):
|
|||
if not unalloc_part:
|
||||
try:
|
||||
if not partition_offset:
|
||||
fs = pytsk3.FS_Info(img)
|
||||
filesystem = pytsk3.FS_Info(img)
|
||||
else:
|
||||
offset -= partition_offset * device_block_size
|
||||
fs = pytsk3.FS_Info(
|
||||
filesystem = pytsk3.FS_Info(
|
||||
img, offset=partition_offset * device_block_size)
|
||||
except TypeError as e:
|
||||
print(e)
|
||||
block_size = fs.info.block_size
|
||||
block_size = filesystem.info.block_size
|
||||
|
||||
inums = get_inums(block_db, offset / block_size, part=partition)
|
||||
|
||||
|
@ -296,7 +312,7 @@ def get_filename_from_offset(image_path, image_hash, offset):
|
|||
if inums:
|
||||
for i in inums:
|
||||
real_inum = i[0]
|
||||
if i[0] == 0 and fs.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
|
||||
if i[0] == 0 and filesystem.info.ftype == pytsk3.TSK_FS_TYPE_NTFS_DETECT:
|
||||
mft_record_size_offset = 0x40
|
||||
if partition_offset:
|
||||
mft_record_size_offset = \
|
||||
|
@ -304,10 +320,10 @@ def get_filename_from_offset(image_path, image_hash, offset):
|
|||
mft_record_size = int.from_bytes(
|
||||
img.read(mft_record_size_offset, 1), 'little', signed=True)
|
||||
if mft_record_size < 0:
|
||||
mft_record_size = 2 ** (mft_record_size * -1)
|
||||
mft_record_size = 2**(mft_record_size * -1)
|
||||
else:
|
||||
mft_record_size = mft_record_size * block_size
|
||||
real_inum = get_resident_inum(offset, fs, mft_record_size)
|
||||
real_inum = get_resident_inum(offset, filesystem, mft_record_size)
|
||||
filename = get_filename(block_db, real_inum, part=partition)
|
||||
if filename and not filenames:
|
||||
filenames.append('{0:s} ({1:d})'.format(filename, real_inum))
|
||||
|
@ -343,21 +359,21 @@ def get_inums(block_db, block, part=None):
|
|||
return inums
|
||||
|
||||
|
||||
def get_resident_inum(offset, fs, mft_record_size):
|
||||
def get_resident_inum(offset, filesystem, mft_record_size):
|
||||
"""Gets the inode number associated with NTFS $MFT resident data.
|
||||
|
||||
Args:
|
||||
offset: Data offset within volume
|
||||
fs: pytsk3 FS_INFO object
|
||||
filesystem: pytsk3 FS_INFO object
|
||||
mft_record_size: Size of an $MFT entry
|
||||
|
||||
Returns:
|
||||
inode number of resident data
|
||||
"""
|
||||
block_size = fs.info.block_size
|
||||
block_size = filesystem.info.block_size
|
||||
offset_block = int(offset / block_size)
|
||||
|
||||
inode = fs.open_meta(0)
|
||||
inode = filesystem.open_meta(0)
|
||||
mft_entry = 0
|
||||
for attr in inode:
|
||||
for run in attr:
|
||||
|
|
42
dfdewey/yapf_test.py
Normal file
42
dfdewey/yapf_test.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Enforce code style with YAPF."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import unittest
|
||||
|
||||
|
||||
class StyleTest(unittest.TestCase):
|
||||
"""Enforce code style requirements."""
|
||||
|
||||
def testCodeStyle(self):
|
||||
"""Check YAPF style enforcement runs cleanly."""
|
||||
dfdewey_path = os.path.abspath(os.path.dirname(__file__))
|
||||
config_path = os.path.join(dfdewey_path, '..', '.style.yapf')
|
||||
try:
|
||||
subprocess.check_output(
|
||||
['yapf', '--style', config_path, '--diff', '-r', dfdewey_path])
|
||||
except subprocess.CalledProcessError as e:
|
||||
if hasattr(e, 'output'):
|
||||
raise Exception(
|
||||
'Run "yapf --style {0:s} -i -r {1:s}" '
|
||||
'to correct these problems: {2:s}'.format(
|
||||
config_path, dfdewey_path, e.output.decode('utf-8'))) from e
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
23
run_tests.py
Executable file
23
run_tests.py
Executable file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Script to run tests."""
|
||||
|
||||
import subprocess
|
||||
|
||||
if __name__ == '__main__':
|
||||
subprocess.check_call([
|
||||
'nosetests', '-vv', '--with-coverage', '--cover-package=dfdewey', '--exe'
|
||||
])
|
10
setup.py
10
setup.py
|
@ -20,9 +20,13 @@ import sys
|
|||
from setuptools import find_packages
|
||||
from setuptools import setup
|
||||
|
||||
import dfdewey
|
||||
|
||||
sys.path.insert(0, '.')
|
||||
|
||||
import dfdewey
|
||||
DFDEWEY_DESCRIPTION = (
|
||||
'dfDewey is a digital forensics string extraction, indexing, and searching '
|
||||
'tool.')
|
||||
|
||||
requirements = []
|
||||
with open('requirements.txt','r') as f:
|
||||
|
@ -30,7 +34,7 @@ with open('requirements.txt','r') as f:
|
|||
setup(
|
||||
name='dfDewey',
|
||||
version=dfdewey.__version__,
|
||||
description='dfDewey is a digital forensics string extraction, indexing, and searching tool.',
|
||||
description=DFDEWEY_DESCRIPTION,
|
||||
license='Apache License, Version 2.0',
|
||||
maintainer='dfDewey development team',
|
||||
maintainer_email='dfdewey-dev@googlegroups.com',
|
||||
|
@ -38,6 +42,6 @@ setup(
|
|||
include_package_data=True,
|
||||
install_requires=requirements,
|
||||
extras_require={
|
||||
'dev': []
|
||||
'dev': ['mock', 'nose', 'yapf', 'coverage']
|
||||
}
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue