Add JSON output (#32)
* Move to pytest * Replace imp with importlib * Add JSON output * Update unit test actions
This commit is contained in:
parent
7aadd41ee2
commit
5da497d49c
9 changed files with 102 additions and 50 deletions
59
.github/workflows/unit-tests.yml
vendored
59
.github/workflows/unit-tests.yml
vendored
|
@ -5,36 +5,6 @@ on:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-bionic:
|
|
||||||
runs-on: ubuntu-18.04
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- python-version: '3.6'
|
|
||||||
- python-version: '3.7'
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
env:
|
|
||||||
DEBIAN_FRONTEND: noninteractive
|
|
||||||
run: |
|
|
||||||
sudo apt update -q
|
|
||||||
sudo apt install -y software-properties-common
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
env:
|
|
||||||
DEBIAN_FRONTEND: noninteractive
|
|
||||||
run: |
|
|
||||||
sudo add-apt-repository -y ppa:gift/stable
|
|
||||||
sudo apt update -q
|
|
||||||
sudo apt install -y python${{ matrix.python-version }} python3-dfvfs python3-pip python3-setuptools
|
|
||||||
python3 -m pip install .[dev]
|
|
||||||
|
|
||||||
- name: Run unit tests
|
|
||||||
run: python3 run_tests.py
|
|
||||||
|
|
||||||
build-focal:
|
build-focal:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-20.04
|
||||||
strategy:
|
strategy:
|
||||||
|
@ -64,3 +34,32 @@ jobs:
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: python3 run_tests.py
|
run: python3 run_tests.py
|
||||||
|
|
||||||
|
build-jammy:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- python-version: '3.10'
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
env:
|
||||||
|
DEBIAN_FRONTEND: noninteractive
|
||||||
|
run: |
|
||||||
|
sudo apt update -q
|
||||||
|
sudo apt install -y software-properties-common
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
env:
|
||||||
|
DEBIAN_FRONTEND: noninteractive
|
||||||
|
run: |
|
||||||
|
sudo add-apt-repository -y ppa:gift/stable
|
||||||
|
sudo apt update -q
|
||||||
|
sudo apt install -y python${{ matrix.python-version }} python3-dfvfs python3-pip python3-setuptools
|
||||||
|
python3 -m pip install .[dev]
|
||||||
|
|
||||||
|
- name: Run unit tests
|
||||||
|
run: python3 run_tests.py
|
||||||
|
|
|
@ -14,7 +14,8 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""DFDewey Config."""
|
"""DFDewey Config."""
|
||||||
|
|
||||||
import imp
|
import importlib.machinery
|
||||||
|
import importlib.util
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
@ -70,12 +71,16 @@ def load_config(config_file=None):
|
||||||
if config_file:
|
if config_file:
|
||||||
log.debug('Loading config from {0:s}'.format(config_file))
|
log.debug('Loading config from {0:s}'.format(config_file))
|
||||||
try:
|
try:
|
||||||
config = imp.load_source('config', config_file)
|
spec = importlib.util.spec_from_loader(
|
||||||
except IOError as e:
|
'config', importlib.machinery.SourceFileLoader('config', config_file))
|
||||||
|
config = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(config)
|
||||||
|
except FileNotFoundError as e:
|
||||||
log.error(
|
log.error(
|
||||||
'Could not load config file {0:s}: {1!s}'.format(config_file, e))
|
'Could not load config file {0:s}: {1!s}'.format(config_file, e))
|
||||||
|
config = None
|
||||||
|
|
||||||
if not config:
|
if not config:
|
||||||
log.warn('Config file not loaded. Using default datastore settings.')
|
log.warning('Config file not loaded. Using default datastore settings.')
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
|
@ -107,7 +107,8 @@ def main():
|
||||||
image_processor_options, args.config)
|
image_processor_options, args.config)
|
||||||
image_processor.process_image()
|
image_processor.process_image()
|
||||||
else:
|
else:
|
||||||
index_searcher = IndexSearcher(args.case, image_id, args.image, args.config)
|
index_searcher = IndexSearcher(
|
||||||
|
args.case, image_id, args.image, args.json, args.config)
|
||||||
if args.search:
|
if args.search:
|
||||||
index_searcher.search(args.search, args.highlight)
|
index_searcher.search(args.search, args.highlight)
|
||||||
elif args.search_list:
|
elif args.search_list:
|
||||||
|
@ -150,6 +151,8 @@ def parse_args():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--highlight', help='highlight search term in results',
|
'--highlight', help='highlight search term in results',
|
||||||
action='store_true')
|
action='store_true')
|
||||||
|
parser.add_argument(
|
||||||
|
'--json', help='output results in JSON format', action='store_true')
|
||||||
parser.add_argument('-s', '--search', help='search query')
|
parser.add_argument('-s', '--search', help='search query')
|
||||||
parser.add_argument('--search_list', help='file with search queries')
|
parser.add_argument('--search_list', help='file with search queries')
|
||||||
|
|
||||||
|
|
|
@ -61,9 +61,8 @@ class FileEntryScanner(volume_scanner.VolumeScanner):
|
||||||
"""File entry scanner."""
|
"""File entry scanner."""
|
||||||
|
|
||||||
_NON_PRINTABLE_CHARACTERS = list(range(0, 0x20)) + list(range(0x7f, 0xa0))
|
_NON_PRINTABLE_CHARACTERS = list(range(0, 0x20)) + list(range(0x7f, 0xa0))
|
||||||
_ESCAPE_CHARACTERS = str.maketrans({
|
_ESCAPE_CHARACTERS = str.maketrans(
|
||||||
value: '\\x{0:02x}'.format(value) for value in _NON_PRINTABLE_CHARACTERS
|
{value: '\\x{0:02x}'.format(value) for value in _NON_PRINTABLE_CHARACTERS})
|
||||||
})
|
|
||||||
|
|
||||||
def __init__(self, mediator=None):
|
def __init__(self, mediator=None):
|
||||||
"""Initializes a file entry scanner.
|
"""Initializes a file entry scanner.
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Index searcher."""
|
"""Index searcher."""
|
||||||
|
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -66,7 +67,7 @@ class _SearchHit():
|
||||||
class IndexSearcher():
|
class IndexSearcher():
|
||||||
"""Index Searcher class."""
|
"""Index Searcher class."""
|
||||||
|
|
||||||
def __init__(self, case, image_id, image, config_file=None):
|
def __init__(self, case, image_id, image, json=False, config_file=None):
|
||||||
"""Create an index searcher."""
|
"""Create an index searcher."""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.case = case
|
self.case = case
|
||||||
|
@ -75,6 +76,7 @@ class IndexSearcher():
|
||||||
self.image = image
|
self.image = image
|
||||||
self.image_id = image_id
|
self.image_id = image_id
|
||||||
self.images = {}
|
self.images = {}
|
||||||
|
self.json = json
|
||||||
self.postgresql = None
|
self.postgresql = None
|
||||||
self.scanner = None
|
self.scanner = None
|
||||||
|
|
||||||
|
@ -270,7 +272,11 @@ class IndexSearcher():
|
||||||
Args:
|
Args:
|
||||||
query_list (str): path to a text file containing multiple search terms.
|
query_list (str): path to a text file containing multiple search terms.
|
||||||
"""
|
"""
|
||||||
|
search_results = {}
|
||||||
for image_hash, image_path in self.images.items():
|
for image_hash, image_path in self.images.items():
|
||||||
|
search_results[image_hash] = {}
|
||||||
|
search_results[image_hash]['image'] = image_path
|
||||||
|
search_results[image_hash]['results'] = {}
|
||||||
index = ''.join(('es', image_hash))
|
index = ''.join(('es', image_hash))
|
||||||
with open(query_list, 'r') as search_terms:
|
with open(query_list, 'r') as search_terms:
|
||||||
table_data = []
|
table_data = []
|
||||||
|
@ -279,14 +285,18 @@ class IndexSearcher():
|
||||||
results = self.opensearch.search(index, term)
|
results = self.opensearch.search(index, term)
|
||||||
hit_count = results['hits']['total']['value']
|
hit_count = results['hits']['total']['value']
|
||||||
if hit_count > 0:
|
if hit_count > 0:
|
||||||
|
search_results[image_hash]['results'][term] = hit_count
|
||||||
table_data.append({'Search term': term, 'Hits': hit_count})
|
table_data.append({'Search term': term, 'Hits': hit_count})
|
||||||
if table_data:
|
if table_data:
|
||||||
output = tabulate(table_data, headers='keys', tablefmt='simple')
|
output = tabulate(table_data, headers='keys', tablefmt='simple')
|
||||||
else:
|
else:
|
||||||
output = 'No results.'
|
output = 'No results.'
|
||||||
|
if not self.json:
|
||||||
log.info(
|
log.info(
|
||||||
'Searched %s (%s) for terms in %s\n\n%s\n', image_path, image_hash,
|
'Searched %s (%s) for terms in %s\n\n%s\n', image_path, image_hash,
|
||||||
query_list, output)
|
query_list, output)
|
||||||
|
if self.json:
|
||||||
|
log.info('%s', json.JSONEncoder().encode(search_results))
|
||||||
|
|
||||||
def search(self, query, highlight=False):
|
def search(self, query, highlight=False):
|
||||||
"""Run a single query.
|
"""Run a single query.
|
||||||
|
@ -295,7 +305,10 @@ class IndexSearcher():
|
||||||
query (str): query to run.
|
query (str): query to run.
|
||||||
highlight (bool): flag to highlight search term in results.
|
highlight (bool): flag to highlight search term in results.
|
||||||
"""
|
"""
|
||||||
|
search_results = {}
|
||||||
for image_hash, image_path in self.images.items():
|
for image_hash, image_path in self.images.items():
|
||||||
|
search_results[image_hash] = {}
|
||||||
|
search_results[image_hash]['image'] = image_path
|
||||||
log.info('Searching %s (%s) for "%s"', image_path, image_hash, query)
|
log.info('Searching %s (%s) for "%s"', image_path, image_hash, query)
|
||||||
index = ''.join(('es', image_hash))
|
index = ''.join(('es', image_hash))
|
||||||
results = self.opensearch.search(index, query)
|
results = self.opensearch.search(index, query)
|
||||||
|
@ -329,7 +342,11 @@ class IndexSearcher():
|
||||||
hit.data = self._highlight_hit(hit.data, hit_positions)
|
hit.data = self._highlight_hit(hit.data, hit_positions)
|
||||||
hit.data = '\n'.join(hit.data)
|
hit.data = '\n'.join(hit.data)
|
||||||
hits.append(hit.copy_to_dict())
|
hits.append(hit.copy_to_dict())
|
||||||
|
search_results[image_hash][query] = hits
|
||||||
|
if not self.json:
|
||||||
output = tabulate(hits, headers='keys', tablefmt='simple')
|
output = tabulate(hits, headers='keys', tablefmt='simple')
|
||||||
log.info(
|
log.info(
|
||||||
'Returned %d results in %dms.\n\n%s\n', result_count, time_taken,
|
'Returned %d results in %dms.\n\n%s\n', result_count, time_taken,
|
||||||
output)
|
output)
|
||||||
|
if self.json:
|
||||||
|
log.info('%s', json.JSONEncoder().encode(search_results))
|
||||||
|
|
|
@ -140,17 +140,27 @@ class IndexSearcherTest(unittest.TestCase):
|
||||||
index_searcher.images = {TEST_IMAGE_HASH: TEST_IMAGE}
|
index_searcher.images = {TEST_IMAGE_HASH: TEST_IMAGE}
|
||||||
current_path = os.path.abspath(os.path.dirname(__file__))
|
current_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
query_list = os.path.join(
|
query_list = os.path.join(
|
||||||
current_path, '..', '..', 'test_data', 'wordlist.txt')
|
current_path, '..', '..', 'test_data', 'searchlist.txt')
|
||||||
mock_search.return_value = {'hits': {'total': {'value': 1}}}
|
mock_search.return_value = {'hits': {'total': {'value': 1}}}
|
||||||
index_searcher.list_search(query_list)
|
index_searcher.list_search(query_list)
|
||||||
self.assertEqual(mock_search.call_count, 8)
|
self.assertEqual(mock_search.call_count, 5)
|
||||||
mock_output.assert_called_once()
|
mock_output.assert_called_once()
|
||||||
self.assertEqual(mock_output.call_args.args[1], TEST_IMAGE)
|
self.assertEqual(mock_output.call_args.args[1], TEST_IMAGE)
|
||||||
self.assertEqual(mock_output.call_args.args[2], TEST_IMAGE_HASH)
|
self.assertEqual(mock_output.call_args.args[2], TEST_IMAGE_HASH)
|
||||||
self.assertEqual(mock_output.call_args.args[3], query_list)
|
self.assertEqual(mock_output.call_args.args[3], query_list)
|
||||||
|
|
||||||
|
# Test JSON output
|
||||||
|
expected_output = '{"%s": {"image": "%s", "results": {"\\"list\\"": 1, "\\"of\\"": 1, "\\"test\\"": 1, "\\"search\\"": 1, "\\"terms\\"": 1}}}' % (
|
||||||
|
TEST_IMAGE_HASH, TEST_IMAGE)
|
||||||
|
mock_output.reset_mock()
|
||||||
|
index_searcher.json = True
|
||||||
|
index_searcher.list_search(query_list)
|
||||||
|
mock_output.assert_called_once()
|
||||||
|
self.assertEqual(mock_output.call_args.args[1], expected_output)
|
||||||
|
|
||||||
# Test no results
|
# Test no results
|
||||||
mock_output.reset_mock()
|
mock_output.reset_mock()
|
||||||
|
index_searcher.json = False
|
||||||
mock_search.return_value = {'hits': {'total': {'value': 0}}}
|
mock_search.return_value = {'hits': {'total': {'value': 0}}}
|
||||||
index_searcher.list_search(query_list)
|
index_searcher.list_search(query_list)
|
||||||
mock_output.assert_called_once()
|
mock_output.assert_called_once()
|
||||||
|
@ -211,6 +221,17 @@ class IndexSearcherTest(unittest.TestCase):
|
||||||
self.assertEqual(table_output[106:110], 'test')
|
self.assertEqual(table_output[106:110], 'test')
|
||||||
self.assertEqual(table_output[111:117], 'GZIP-0')
|
self.assertEqual(table_output[111:117], 'GZIP-0')
|
||||||
|
|
||||||
|
# Test JSON output
|
||||||
|
expected_output = '{"%s": {"image": "%s", "test": [{"Offset": "12889600\\nGZIP-0", "Filename (inode)": "", "String": "test"}]}}' % (
|
||||||
|
TEST_IMAGE_HASH, image_path)
|
||||||
|
mock_search.reset_mock()
|
||||||
|
mock_output.reset_mock()
|
||||||
|
index_searcher.json = True
|
||||||
|
index_searcher.search('test')
|
||||||
|
mock_search.assert_called_once()
|
||||||
|
output_calls = mock_output.mock_calls
|
||||||
|
self.assertEqual(output_calls[1].args[1], expected_output)
|
||||||
|
|
||||||
def test_wrap_filenames(self):
|
def test_wrap_filenames(self):
|
||||||
"""Test wrap filenames method."""
|
"""Test wrap filenames method."""
|
||||||
index_searcher = self._get_index_searcher()
|
index_searcher = self._get_index_searcher()
|
||||||
|
|
|
@ -19,5 +19,8 @@ import subprocess
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
subprocess.check_call([
|
subprocess.check_call([
|
||||||
'nosetests', '-vv', '--with-coverage', '--cover-package=dfdewey', '--exe'
|
'coverage', 'run', '--source=dfdewey', '-m', 'pytest'
|
||||||
|
])
|
||||||
|
subprocess.check_call([
|
||||||
|
'coverage', 'report'
|
||||||
])
|
])
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -67,7 +67,7 @@ setup(
|
||||||
],
|
],
|
||||||
install_requires=requirements,
|
install_requires=requirements,
|
||||||
extras_require={
|
extras_require={
|
||||||
'dev': ['mock', 'nose', 'yapf', 'coverage']
|
'dev': ['mock', 'pytest', 'yapf', 'coverage']
|
||||||
},
|
},
|
||||||
entry_points={'console_scripts': ['dfdewey=dfdewey.dfdcli:main']},
|
entry_points={'console_scripts': ['dfdewey=dfdewey.dfdcli:main']},
|
||||||
python_requires='>=3.6',
|
python_requires='>=3.6',
|
||||||
|
|
5
test_data/searchlist.txt
Normal file
5
test_data/searchlist.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
list
|
||||||
|
of
|
||||||
|
test
|
||||||
|
search
|
||||||
|
terms
|
Loading…
Reference in a new issue