1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 06:37:17 +00:00

Format files with black

- Format all files with black
- Update tox.ini flake8 arguments to be compatible
- Add black to requirements-extra.txt
- Reduce ignored flake8 rules and fix a few violations
This commit is contained in:
2019-12-31 20:16:27 -06:00
parent 359d6498f7
commit 7ba8aa3514
141 changed files with 5241 additions and 3648 deletions

View File

@@ -20,93 +20,106 @@ from .results_test import GetTestGroups
from .. import app, fs, engine
from ..scanner import ScanType
def add_fake_files_to_directories(directories, files):
directories.get_files = lambda j=None: iter(files)
directories._dirs.append('this is just so Scan() doesnt return 3')
directories._dirs.append("this is just so Scan() doesnt return 3")
class TestCaseDupeGuru:
def test_apply_filter_calls_results_apply_filter(self, monkeypatch):
dgapp = TestApp().app
monkeypatch.setattr(dgapp.results, 'apply_filter', log_calls(dgapp.results.apply_filter))
dgapp.apply_filter('foo')
monkeypatch.setattr(
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
)
dgapp.apply_filter("foo")
eq_(2, len(dgapp.results.apply_filter.calls))
call = dgapp.results.apply_filter.calls[0]
assert call['filter_str'] is None
assert call["filter_str"] is None
call = dgapp.results.apply_filter.calls[1]
eq_('foo', call['filter_str'])
eq_("foo", call["filter_str"])
def test_apply_filter_escapes_regexp(self, monkeypatch):
dgapp = TestApp().app
monkeypatch.setattr(dgapp.results, 'apply_filter', log_calls(dgapp.results.apply_filter))
dgapp.apply_filter('()[]\\.|+?^abc')
monkeypatch.setattr(
dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
)
dgapp.apply_filter("()[]\\.|+?^abc")
call = dgapp.results.apply_filter.calls[1]
eq_('\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc', call['filter_str'])
dgapp.apply_filter('(*)') # In "simple mode", we want the * to behave as a wilcard
eq_("\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc", call["filter_str"])
dgapp.apply_filter(
"(*)"
) # In "simple mode", we want the * to behave as a wilcard
call = dgapp.results.apply_filter.calls[3]
eq_(r'\(.*\)', call['filter_str'])
dgapp.options['escape_filter_regexp'] = False
dgapp.apply_filter('(abc)')
eq_(r"\(.*\)", call["filter_str"])
dgapp.options["escape_filter_regexp"] = False
dgapp.apply_filter("(abc)")
call = dgapp.results.apply_filter.calls[5]
eq_('(abc)', call['filter_str'])
eq_("(abc)", call["filter_str"])
def test_copy_or_move(self, tmpdir, monkeypatch):
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
# for this unit is pathetic. What's done is done. My approach now is to add tests for
# every change I want to make. The blowup was caused by a missing import.
p = Path(str(tmpdir))
p['foo'].open('w').close()
monkeypatch.setattr(hscommon.conflict, 'smart_copy', log_calls(lambda source_path, dest_path: None))
p["foo"].open("w").close()
monkeypatch.setattr(
hscommon.conflict,
"smart_copy",
log_calls(lambda source_path, dest_path: None),
)
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
monkeypatch.setattr(app, 'smart_copy', hscommon.conflict.smart_copy)
monkeypatch.setattr(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
monkeypatch.setattr(app, "smart_copy", hscommon.conflict.smart_copy)
monkeypatch.setattr(
os, "makedirs", lambda path: None
) # We don't want the test to create that fake directory
dgapp = TestApp().app
dgapp.directories.add_path(p)
[f] = dgapp.directories.get_files()
dgapp.copy_or_move(f, True, 'some_destination', 0)
dgapp.copy_or_move(f, True, "some_destination", 0)
eq_(1, len(hscommon.conflict.smart_copy.calls))
call = hscommon.conflict.smart_copy.calls[0]
eq_(call['dest_path'], op.join('some_destination', 'foo'))
eq_(call['source_path'], f.path)
eq_(call["dest_path"], op.join("some_destination", "foo"))
eq_(call["source_path"], f.path)
def test_copy_or_move_clean_empty_dirs(self, tmpdir, monkeypatch):
tmppath = Path(str(tmpdir))
sourcepath = tmppath['source']
sourcepath = tmppath["source"]
sourcepath.mkdir()
sourcepath['myfile'].open('w')
sourcepath["myfile"].open("w")
app = TestApp().app
app.directories.add_path(tmppath)
[myfile] = app.directories.get_files()
monkeypatch.setattr(app, 'clean_empty_dirs', log_calls(lambda path: None))
app.copy_or_move(myfile, False, tmppath['dest'], 0)
monkeypatch.setattr(app, "clean_empty_dirs", log_calls(lambda path: None))
app.copy_or_move(myfile, False, tmppath["dest"], 0)
calls = app.clean_empty_dirs.calls
eq_(1, len(calls))
eq_(sourcepath, calls[0]['path'])
eq_(sourcepath, calls[0]["path"])
def test_Scan_with_objects_evaluating_to_false(self):
class FakeFile(fs.File):
def __bool__(self):
return False
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
app = TestApp().app
f1, f2 = [FakeFile('foo') for i in range(2)]
f1, f2 = [FakeFile("foo") for i in range(2)]
f1.is_ref, f2.is_ref = (False, False)
assert not (bool(f1) and bool(f2))
add_fake_files_to_directories(app.directories, [f1, f2])
app.start_scanning() # no exception
app.start_scanning() # no exception
@mark.skipif("not hasattr(os, 'link')")
def test_ignore_hardlink_matches(self, tmpdir):
# If the ignore_hardlink_matches option is set, don't match files hardlinking to the same
# inode.
tmppath = Path(str(tmpdir))
tmppath['myfile'].open('w').write('foo')
os.link(str(tmppath['myfile']), str(tmppath['hardlink']))
tmppath["myfile"].open("w").write("foo")
os.link(str(tmppath["myfile"]), str(tmppath["hardlink"]))
app = TestApp().app
app.directories.add_path(tmppath)
app.options['scan_type'] = ScanType.Contents
app.options['ignore_hardlink_matches'] = True
app.options["scan_type"] = ScanType.Contents
app.options["ignore_hardlink_matches"] = True
app.start_scanning()
eq_(len(app.results.groups), 0)
@@ -116,27 +129,32 @@ class TestCaseDupeGuru:
# making the selected row None. Don't crash when it happens.
dgapp = TestApp().app
# selected_row is None because there's no result.
assert not dgapp.result_table.rename_selected('foo') # no crash
assert not dgapp.result_table.rename_selected("foo") # no crash
class TestCaseDupeGuru_clean_empty_dirs:
def pytest_funcarg__do_setup(self, request):
monkeypatch = request.getfuncargvalue('monkeypatch')
monkeypatch.setattr(hscommon.util, 'delete_if_empty', log_calls(lambda path, files_to_delete=[]: None))
monkeypatch = request.getfuncargvalue("monkeypatch")
monkeypatch.setattr(
hscommon.util,
"delete_if_empty",
log_calls(lambda path, files_to_delete=[]: None),
)
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
monkeypatch.setattr(app, 'delete_if_empty', hscommon.util.delete_if_empty)
monkeypatch.setattr(app, "delete_if_empty", hscommon.util.delete_if_empty)
self.app = TestApp().app
def test_option_off(self, do_setup):
self.app.clean_empty_dirs(Path('/foo/bar'))
self.app.clean_empty_dirs(Path("/foo/bar"))
eq_(0, len(hscommon.util.delete_if_empty.calls))
def test_option_on(self, do_setup):
self.app.options['clean_empty_dirs'] = True
self.app.clean_empty_dirs(Path('/foo/bar'))
self.app.options["clean_empty_dirs"] = True
self.app.clean_empty_dirs(Path("/foo/bar"))
calls = hscommon.util.delete_if_empty.calls
eq_(1, len(calls))
eq_(Path('/foo/bar'), calls[0]['path'])
eq_(['.DS_Store'], calls[0]['files_to_delete'])
eq_(Path("/foo/bar"), calls[0]["path"])
eq_([".DS_Store"], calls[0]["files_to_delete"])
def test_recurse_up(self, do_setup, monkeypatch):
# delete_if_empty must be recursively called up in the path until it returns False
@@ -144,16 +162,16 @@ class TestCaseDupeGuru_clean_empty_dirs:
def mock_delete_if_empty(path, files_to_delete=[]):
return len(path) > 1
monkeypatch.setattr(hscommon.util, 'delete_if_empty', mock_delete_if_empty)
monkeypatch.setattr(hscommon.util, "delete_if_empty", mock_delete_if_empty)
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
monkeypatch.setattr(app, 'delete_if_empty', mock_delete_if_empty)
self.app.options['clean_empty_dirs'] = True
self.app.clean_empty_dirs(Path('not-empty/empty/empty'))
monkeypatch.setattr(app, "delete_if_empty", mock_delete_if_empty)
self.app.options["clean_empty_dirs"] = True
self.app.clean_empty_dirs(Path("not-empty/empty/empty"))
calls = hscommon.util.delete_if_empty.calls
eq_(3, len(calls))
eq_(Path('not-empty/empty/empty'), calls[0]['path'])
eq_(Path('not-empty/empty'), calls[1]['path'])
eq_(Path('not-empty'), calls[2]['path'])
eq_(Path("not-empty/empty/empty"), calls[0]["path"])
eq_(Path("not-empty/empty"), calls[1]["path"])
eq_(Path("not-empty"), calls[2]["path"])
class TestCaseDupeGuruWithResults:
@@ -166,10 +184,10 @@ class TestCaseDupeGuruWithResults:
self.dtree = app.dtree
self.rtable = app.rtable
self.rtable.refresh()
tmpdir = request.getfuncargvalue('tmpdir')
tmpdir = request.getfuncargvalue("tmpdir")
tmppath = Path(str(tmpdir))
tmppath['foo'].mkdir()
tmppath['bar'].mkdir()
tmppath["foo"].mkdir()
tmppath["bar"].mkdir()
self.app.directories.add_path(tmppath)
def test_GetObjects(self, do_setup):
@@ -187,8 +205,8 @@ class TestCaseDupeGuruWithResults:
def test_GetObjects_after_sort(self, do_setup):
objects = self.objects
groups = self.groups[:] # we need an un-sorted reference
self.rtable.sort('name', False)
groups = self.groups[:] # we need an un-sorted reference
self.rtable.sort("name", False)
r = self.rtable[1]
assert r._group is groups[1]
assert r._dupe is objects[4]
@@ -198,7 +216,7 @@ class TestCaseDupeGuruWithResults:
self.rtable.select([1, 2, 3])
self.app.remove_selected()
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
eq_(self.rtable.selected_indexes, [1]) # no exception
eq_(self.rtable.selected_indexes, [1]) # no exception
def test_selectResultNodePaths(self, do_setup):
app = self.app
@@ -220,9 +238,9 @@ class TestCaseDupeGuruWithResults:
def test_selectResultNodePaths_after_sort(self, do_setup):
app = self.app
objects = self.objects
groups = self.groups[:] #To keep the old order in memory
self.rtable.sort('name', False) #0
#Now, the group order is supposed to be reversed
groups = self.groups[:] # To keep the old order in memory
self.rtable.sort("name", False) # 0
# Now, the group order is supposed to be reversed
self.rtable.select([1, 2, 3])
eq_(len(app.selected_dupes), 3)
assert app.selected_dupes[0] is objects[4]
@@ -242,13 +260,13 @@ class TestCaseDupeGuruWithResults:
self.rtable.power_marker = True
self.rtable.select([0, 1, 2])
app.remove_selected()
eq_(self.rtable.selected_indexes, []) # no exception
eq_(self.rtable.selected_indexes, []) # no exception
def test_selectPowerMarkerRows_after_sort(self, do_setup):
app = self.app
objects = self.objects
self.rtable.power_marker = True
self.rtable.sort('name', False)
self.rtable.sort("name", False)
self.rtable.select([0, 1, 2])
eq_(len(app.selected_dupes), 3)
assert app.selected_dupes[0] is objects[4]
@@ -285,11 +303,11 @@ class TestCaseDupeGuruWithResults:
def test_refreshDetailsWithSelected(self, do_setup):
self.rtable.select([1, 4])
eq_(self.dpanel.row(0), ('Filename', 'bar bleh', 'foo bar'))
self.dpanel.view.check_gui_calls(['refresh'])
eq_(self.dpanel.row(0), ("Filename", "bar bleh", "foo bar"))
self.dpanel.view.check_gui_calls(["refresh"])
self.rtable.select([])
eq_(self.dpanel.row(0), ('Filename', '---', '---'))
self.dpanel.view.check_gui_calls(['refresh'])
eq_(self.dpanel.row(0), ("Filename", "---", "---"))
self.dpanel.view.check_gui_calls(["refresh"])
def test_makeSelectedReference(self, do_setup):
app = self.app
@@ -300,12 +318,14 @@ class TestCaseDupeGuruWithResults:
assert groups[0].ref is objects[1]
assert groups[1].ref is objects[4]
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self, do_setup):
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(
self, do_setup
):
app = self.app
objects = self.objects
groups = self.groups
self.rtable.select([1, 2, 4])
#Only [0, 0] and [1, 0] must go ref, not [0, 1] because it is a part of the same group
# Only [0, 0] and [1, 0] must go ref, not [0, 1] because it is a part of the same group
app.make_selected_reference()
assert groups[0].ref is objects[1]
assert groups[1].ref is objects[4]
@@ -314,7 +334,7 @@ class TestCaseDupeGuruWithResults:
app = self.app
self.rtable.select([1, 4])
app.remove_selected()
eq_(len(app.results.dupes), 1) # the first path is now selected
eq_(len(app.results.dupes), 1) # the first path is now selected
app.remove_selected()
eq_(len(app.results.dupes), 0)
@@ -336,27 +356,27 @@ class TestCaseDupeGuruWithResults:
def test_addDirectory_does_not_exist(self, do_setup):
app = self.app
app.add_directory('/does_not_exist')
app.add_directory("/does_not_exist")
eq_(len(app.view.messages), 1)
assert "exist" in app.view.messages[0]
def test_ignore(self, do_setup):
app = self.app
self.rtable.select([4]) #The dupe of the second, 2 sized group
self.rtable.select([4]) # The dupe of the second, 2 sized group
app.add_selected_to_ignore_list()
eq_(len(app.ignore_list), 1)
self.rtable.select([1]) #first dupe of the 3 dupes group
self.rtable.select([1]) # first dupe of the 3 dupes group
app.add_selected_to_ignore_list()
#BOTH the ref and the other dupe should have been added
# BOTH the ref and the other dupe should have been added
eq_(len(app.ignore_list), 3)
def test_purgeIgnoreList(self, do_setup, tmpdir):
app = self.app
p1 = str(tmpdir.join('file1'))
p2 = str(tmpdir.join('file2'))
open(p1, 'w').close()
open(p2, 'w').close()
dne = '/does_not_exist'
p1 = str(tmpdir.join("file1"))
p2 = str(tmpdir.join("file2"))
open(p1, "w").close()
open(p2, "w").close()
dne = "/does_not_exist"
app.ignore_list.Ignore(dne, p1)
app.ignore_list.Ignore(p2, dne)
app.ignore_list.Ignore(p1, p2)
@@ -381,9 +401,11 @@ class TestCaseDupeGuruWithResults:
# When doing a scan with results being present prior to the scan, correctly invalidate the
# results table.
app = self.app
app.JOB = Job(1, lambda *args, **kw: False) # Cancels the task
add_fake_files_to_directories(app.directories, self.objects) # We want the scan to at least start
app.start_scanning() # will be cancelled immediately
app.JOB = Job(1, lambda *args, **kw: False) # Cancels the task
add_fake_files_to_directories(
app.directories, self.objects
) # We want the scan to at least start
app.start_scanning() # will be cancelled immediately
eq_(len(app.result_table), 0)
def test_selected_dupes_after_removal(self, do_setup):
@@ -401,21 +423,21 @@ class TestCaseDupeGuruWithResults:
# Ref #238
self.rtable.delta_values = True
self.rtable.power_marker = True
self.rtable.sort('dupe_count', False)
self.rtable.sort("dupe_count", False)
# don't crash
self.rtable.sort('percentage', False)
self.rtable.sort("percentage", False)
# don't crash
class TestCaseDupeGuru_renameSelected:
def pytest_funcarg__do_setup(self, request):
tmpdir = request.getfuncargvalue('tmpdir')
tmpdir = request.getfuncargvalue("tmpdir")
p = Path(str(tmpdir))
fp = open(str(p['foo bar 1']), mode='w')
fp = open(str(p["foo bar 1"]), mode="w")
fp.close()
fp = open(str(p['foo bar 2']), mode='w')
fp = open(str(p["foo bar 2"]), mode="w")
fp.close()
fp = open(str(p['foo bar 3']), mode='w')
fp = open(str(p["foo bar 3"]), mode="w")
fp.close()
files = fs.get_files(p)
for f in files:
@@ -437,46 +459,46 @@ class TestCaseDupeGuru_renameSelected:
app = self.app
g = self.groups[0]
self.rtable.select([1])
assert app.rename_selected('renamed')
assert app.rename_selected("renamed")
names = [p.name for p in self.p.listdir()]
assert 'renamed' in names
assert 'foo bar 2' not in names
eq_(g.dupes[0].name, 'renamed')
assert "renamed" in names
assert "foo bar 2" not in names
eq_(g.dupes[0].name, "renamed")
def test_none_selected(self, do_setup, monkeypatch):
app = self.app
g = self.groups[0]
self.rtable.select([])
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
assert not app.rename_selected('renamed')
msg = logging.warning.calls[0]['msg']
eq_('dupeGuru Warning: list index out of range', msg)
monkeypatch.setattr(logging, "warning", log_calls(lambda msg: None))
assert not app.rename_selected("renamed")
msg = logging.warning.calls[0]["msg"]
eq_("dupeGuru Warning: list index out of range", msg)
names = [p.name for p in self.p.listdir()]
assert 'renamed' not in names
assert 'foo bar 2' in names
eq_(g.dupes[0].name, 'foo bar 2')
assert "renamed" not in names
assert "foo bar 2" in names
eq_(g.dupes[0].name, "foo bar 2")
def test_name_already_exists(self, do_setup, monkeypatch):
app = self.app
g = self.groups[0]
self.rtable.select([1])
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
assert not app.rename_selected('foo bar 1')
msg = logging.warning.calls[0]['msg']
assert msg.startswith('dupeGuru Warning: \'foo bar 1\' already exists in')
monkeypatch.setattr(logging, "warning", log_calls(lambda msg: None))
assert not app.rename_selected("foo bar 1")
msg = logging.warning.calls[0]["msg"]
assert msg.startswith("dupeGuru Warning: 'foo bar 1' already exists in")
names = [p.name for p in self.p.listdir()]
assert 'foo bar 1' in names
assert 'foo bar 2' in names
eq_(g.dupes[0].name, 'foo bar 2')
assert "foo bar 1" in names
assert "foo bar 2" in names
eq_(g.dupes[0].name, "foo bar 2")
class TestAppWithDirectoriesInTree:
def pytest_funcarg__do_setup(self, request):
tmpdir = request.getfuncargvalue('tmpdir')
tmpdir = request.getfuncargvalue("tmpdir")
p = Path(str(tmpdir))
p['sub1'].mkdir()
p['sub2'].mkdir()
p['sub3'].mkdir()
p["sub1"].mkdir()
p["sub2"].mkdir()
p["sub3"].mkdir()
app = TestApp()
self.app = app.app
self.dtree = app.dtree
@@ -487,12 +509,11 @@ class TestAppWithDirectoriesInTree:
# Setting a node state to something also affect subnodes. These subnodes must be correctly
# refreshed.
node = self.dtree[0]
eq_(len(node), 3) # a len() call is required for subnodes to be loaded
eq_(len(node), 3) # a len() call is required for subnodes to be loaded
subnode = node[0]
node.state = 1 # the state property is a state index
node.state = 1 # the state property is a state index
node = self.dtree[0]
eq_(len(node), 3)
subnode = node[0]
eq_(subnode.state, 1)
self.dtree.view.check_gui_calls(['refresh_states'])
self.dtree.view.check_gui_calls(["refresh_states"])

View File

@@ -4,7 +4,7 @@
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
from hscommon.path import Path
from hscommon.util import get_file_ext, format_size
from hscommon.gui.column import Column
@@ -17,6 +17,7 @@ from ..app import DupeGuru as DupeGuruBase
from ..gui.result_table import ResultTable as ResultTableBase
from ..gui.prioritize_dialog import PrioritizeDialog
class DupeGuruView:
JOB = nulljob
@@ -39,28 +40,32 @@ class DupeGuruView:
self.messages.append(msg)
def ask_yes_no(self, prompt):
return True # always answer yes
return True # always answer yes
def create_results_window(self):
pass
class ResultTable(ResultTableBase):
COLUMNS = [
Column('marked', ''),
Column('name', 'Filename'),
Column('folder_path', 'Directory'),
Column('size', 'Size (KB)'),
Column('extension', 'Kind'),
Column("marked", ""),
Column("name", "Filename"),
Column("folder_path", "Directory"),
Column("size", "Size (KB)"),
Column("extension", "Kind"),
]
DELTA_COLUMNS = {'size', }
DELTA_COLUMNS = {
"size",
}
class DupeGuru(DupeGuruBase):
NAME = 'dupeGuru'
METADATA_TO_READ = ['size']
NAME = "dupeGuru"
METADATA_TO_READ = ["size"]
def __init__(self):
DupeGuruBase.__init__(self, DupeGuruView())
self.appdata = '/tmp'
self.appdata = "/tmp"
self._recreate_result_table()
def _prioritization_categories(self):
@@ -78,7 +83,7 @@ class NamedObject:
def __init__(self, name="foobar", with_words=False, size=1, folder=None):
self.name = name
if folder is None:
folder = 'basepath'
folder = "basepath"
self._folder = Path(folder)
self.size = size
self.md5partial = name
@@ -88,7 +93,7 @@ class NamedObject:
self.is_ref = False
def __bool__(self):
return False #Make sure that operations are made correctly when the bool value of files is false.
return False # Make sure that operations are made correctly when the bool value of files is false.
def get_display_info(self, group, delta):
size = self.size
@@ -97,10 +102,10 @@ class NamedObject:
r = group.ref
size -= r.size
return {
'name': self.name,
'folder_path': str(self.folder_path),
'size': format_size(size, 0, 1, False),
'extension': self.extension if hasattr(self, 'extension') else '---',
"name": self.name,
"folder_path": str(self.folder_path),
"size": format_size(size, 0, 1, False),
"extension": self.extension if hasattr(self, "extension") else "---",
}
@property
@@ -115,6 +120,7 @@ class NamedObject:
def extension(self):
return get_file_ext(self.name)
# Returns a group set that looks like that:
# "foo bar" (1)
# "bar bleh" (1024)
@@ -127,21 +133,24 @@ def GetTestGroups():
NamedObject("bar bleh"),
NamedObject("foo bleh"),
NamedObject("ibabtu"),
NamedObject("ibabtu")
NamedObject("ibabtu"),
]
objects[1].size = 1024
matches = engine.getmatches(objects) #we should have 5 matches
groups = engine.get_groups(matches) #We should have 2 groups
matches = engine.getmatches(objects) # we should have 5 matches
groups = engine.get_groups(matches) # We should have 2 groups
for g in groups:
g.prioritize(lambda x: objects.index(x)) #We want the dupes to be in the same order as the list is
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
g.prioritize(
lambda x: objects.index(x)
) # We want the dupes to be in the same order as the list is
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
return (objects, matches, groups)
class TestApp(TestAppBase):
def __init__(self):
def link_gui(gui):
gui.view = self.make_logger()
if hasattr(gui, 'columns'): # tables
if hasattr(gui, "columns"): # tables
gui.columns.view = self.make_logger()
return gui
@@ -166,7 +175,7 @@ class TestApp(TestAppBase):
# rtable is a property because its instance can be replaced during execution
return self.app.result_table
#--- Helpers
# --- Helpers
def select_pri_criterion(self, name):
# Select a main prioritize criterion by name instead of by index. Makes tests more
# maintainable.

View File

@@ -13,13 +13,18 @@ try:
except ImportError:
skip("Can't import the block module, probably hasn't been compiled.")
def my_avgdiff(first, second, limit=768, min_iter=3): # this is so I don't have to re-write every call
def my_avgdiff(
first, second, limit=768, min_iter=3
): # this is so I don't have to re-write every call
return avgdiff(first, second, limit, min_iter)
BLACK = (0, 0, 0)
RED = (0xff, 0, 0)
GREEN = (0, 0xff, 0)
BLUE = (0, 0, 0xff)
RED = (0xFF, 0, 0)
GREEN = (0, 0xFF, 0)
BLUE = (0, 0, 0xFF)
class FakeImage:
def __init__(self, size, data):
@@ -37,16 +42,20 @@ class FakeImage:
pixels.append(pixel)
return FakeImage((box[2] - box[0], box[3] - box[1]), pixels)
def empty():
return FakeImage((0, 0), [])
def single_pixel(): #one red pixel
return FakeImage((1, 1), [(0xff, 0, 0)])
def single_pixel(): # one red pixel
return FakeImage((1, 1), [(0xFF, 0, 0)])
def four_pixels():
pixels = [RED, (0, 0x80, 0xff), (0x80, 0, 0), (0, 0x40, 0x80)]
pixels = [RED, (0, 0x80, 0xFF), (0x80, 0, 0), (0, 0x40, 0x80)]
return FakeImage((2, 2), pixels)
class TestCasegetblock:
def test_single_pixel(self):
im = single_pixel()
@@ -60,9 +69,9 @@ class TestCasegetblock:
def test_four_pixels(self):
im = four_pixels()
[b] = getblocks2(im, 1)
meanred = (0xff + 0x80) // 4
meanred = (0xFF + 0x80) // 4
meangreen = (0x80 + 0x40) // 4
meanblue = (0xff + 0x80) // 4
meanblue = (0xFF + 0x80) // 4
eq_((meanred, meangreen, meanblue), b)
@@ -158,6 +167,7 @@ class TestCasegetblock:
# eq_(BLACK, blocks[3])
#
class TestCasegetblocks2:
def test_empty_image(self):
im = empty()
@@ -169,9 +179,9 @@ class TestCasegetblocks2:
blocks = getblocks2(im, 1)
eq_(1, len(blocks))
block = blocks[0]
meanred = (0xff + 0x80) // 4
meanred = (0xFF + 0x80) // 4
meangreen = (0x80 + 0x40) // 4
meanblue = (0xff + 0x80) // 4
meanblue = (0xFF + 0x80) // 4
eq_((meanred, meangreen, meanblue), block)
def test_four_blocks_all_black(self):
@@ -225,25 +235,25 @@ class TestCaseavgdiff:
my_avgdiff([b, b], [b])
def test_first_arg_is_empty_but_not_second(self):
#Don't return 0 (as when the 2 lists are empty), raise!
# Don't return 0 (as when the 2 lists are empty), raise!
b = (0, 0, 0)
with raises(DifferentBlockCountError):
my_avgdiff([], [b])
def test_limit(self):
ref = (0, 0, 0)
b1 = (10, 10, 10) #avg 30
b2 = (20, 20, 20) #avg 45
b3 = (30, 30, 30) #avg 60
b1 = (10, 10, 10) # avg 30
b2 = (20, 20, 20) # avg 45
b3 = (30, 30, 30) # avg 60
blocks1 = [ref, ref, ref]
blocks2 = [b1, b2, b3]
eq_(45, my_avgdiff(blocks1, blocks2, 44))
def test_min_iterations(self):
ref = (0, 0, 0)
b1 = (10, 10, 10) #avg 30
b2 = (20, 20, 20) #avg 45
b3 = (10, 10, 10) #avg 40
b1 = (10, 10, 10) # avg 30
b2 = (20, 20, 20) # avg 45
b3 = (10, 10, 10) # avg 40
blocks1 = [ref, ref, ref]
blocks2 = [b1, b2, b3]
eq_(40, my_avgdiff(blocks1, blocks2, 45 - 1, 3))

View File

@@ -16,34 +16,35 @@ try:
except ImportError:
skip("Can't import the cache module, probably hasn't been compiled.")
class TestCasecolors_to_string:
def test_no_color(self):
eq_('', colors_to_string([]))
eq_("", colors_to_string([]))
def test_single_color(self):
eq_('000000', colors_to_string([(0, 0, 0)]))
eq_('010101', colors_to_string([(1, 1, 1)]))
eq_('0a141e', colors_to_string([(10, 20, 30)]))
eq_("000000", colors_to_string([(0, 0, 0)]))
eq_("010101", colors_to_string([(1, 1, 1)]))
eq_("0a141e", colors_to_string([(10, 20, 30)]))
def test_two_colors(self):
eq_('000102030405', colors_to_string([(0, 1, 2), (3, 4, 5)]))
eq_("000102030405", colors_to_string([(0, 1, 2), (3, 4, 5)]))
class TestCasestring_to_colors:
def test_empty(self):
eq_([], string_to_colors(''))
eq_([], string_to_colors(""))
def test_single_color(self):
eq_([(0, 0, 0)], string_to_colors('000000'))
eq_([(2, 3, 4)], string_to_colors('020304'))
eq_([(10, 20, 30)], string_to_colors('0a141e'))
eq_([(0, 0, 0)], string_to_colors("000000"))
eq_([(2, 3, 4)], string_to_colors("020304"))
eq_([(10, 20, 30)], string_to_colors("0a141e"))
def test_two_colors(self):
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors('0a141e28323c'))
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors("0a141e28323c"))
def test_incomplete_color(self):
# don't return anything if it's not a complete color
eq_([], string_to_colors('102'))
eq_([], string_to_colors("102"))
class BaseTestCaseCache:
@@ -54,58 +55,58 @@ class BaseTestCaseCache:
c = self.get_cache()
eq_(0, len(c))
with raises(KeyError):
c['foo']
c["foo"]
def test_set_then_retrieve_blocks(self):
c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)]
c['foo'] = b
eq_(b, c['foo'])
c["foo"] = b
eq_(b, c["foo"])
def test_delitem(self):
c = self.get_cache()
c['foo'] = ''
del c['foo']
assert 'foo' not in c
c["foo"] = ""
del c["foo"]
assert "foo" not in c
with raises(KeyError):
del c['foo']
del c["foo"]
def test_persistance(self, tmpdir):
DBNAME = tmpdir.join('hstest.db')
DBNAME = tmpdir.join("hstest.db")
c = self.get_cache(str(DBNAME))
c['foo'] = [(1, 2, 3)]
c["foo"] = [(1, 2, 3)]
del c
c = self.get_cache(str(DBNAME))
eq_([(1, 2, 3)], c['foo'])
eq_([(1, 2, 3)], c["foo"])
def test_filter(self):
c = self.get_cache()
c['foo'] = ''
c['bar'] = ''
c['baz'] = ''
c.filter(lambda p: p != 'bar') #only 'bar' is removed
c["foo"] = ""
c["bar"] = ""
c["baz"] = ""
c.filter(lambda p: p != "bar") # only 'bar' is removed
eq_(2, len(c))
assert 'foo' in c
assert 'baz' in c
assert 'bar' not in c
assert "foo" in c
assert "baz" in c
assert "bar" not in c
def test_clear(self):
c = self.get_cache()
c['foo'] = ''
c['bar'] = ''
c['baz'] = ''
c["foo"] = ""
c["bar"] = ""
c["baz"] = ""
c.clear()
eq_(0, len(c))
assert 'foo' not in c
assert 'baz' not in c
assert 'bar' not in c
assert "foo" not in c
assert "baz" not in c
assert "bar" not in c
def test_by_id(self):
# it's possible to use the cache by referring to the files by their row_id
c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)]
c['foo'] = b
foo_id = c.get_id('foo')
c["foo"] = b
foo_id = c.get_id("foo")
eq_(c[foo_id], b)
@@ -120,16 +121,16 @@ class TestCaseSqliteCache(BaseTestCaseCache):
# If we don't do this monkeypatching, we get a weird exception about trying to flush a
# closed file. I've tried setting logging level and stuff, but nothing worked. So, there we
# go, a dirty monkeypatch.
monkeypatch.setattr(logging, 'warning', lambda *args, **kw: None)
dbname = str(tmpdir.join('foo.db'))
fp = open(dbname, 'w')
fp.write('invalid sqlite content')
monkeypatch.setattr(logging, "warning", lambda *args, **kw: None)
dbname = str(tmpdir.join("foo.db"))
fp = open(dbname, "w")
fp.write("invalid sqlite content")
fp.close()
c = self.get_cache(dbname) # should not raise a DatabaseError
c['foo'] = [(1, 2, 3)]
c = self.get_cache(dbname) # should not raise a DatabaseError
c["foo"] = [(1, 2, 3)]
del c
c = self.get_cache(dbname)
eq_(c['foo'], [(1, 2, 3)])
eq_(c["foo"], [(1, 2, 3)])
class TestCaseShelveCache(BaseTestCaseCache):
@@ -161,4 +162,3 @@ class TestCaseCacheSQLEscape:
del c["foo'bar"]
except KeyError:
assert False

View File

@@ -1 +1 @@
from hscommon.testutil import pytest_funcarg__app # noqa
from hscommon.testutil import pytest_funcarg__app # noqa

View File

@@ -14,91 +14,105 @@ from hscommon.path import Path
from hscommon.testutil import eq_
from ..fs import File
from ..directories import Directories, DirectoryState, AlreadyThereError, InvalidPathError
from ..directories import (
Directories,
DirectoryState,
AlreadyThereError,
InvalidPathError,
)
def create_fake_fs(rootpath):
# We have it as a separate function because other units are using it.
rootpath = rootpath['fs']
rootpath = rootpath["fs"]
rootpath.mkdir()
rootpath['dir1'].mkdir()
rootpath['dir2'].mkdir()
rootpath['dir3'].mkdir()
fp = rootpath['file1.test'].open('w')
fp.write('1')
rootpath["dir1"].mkdir()
rootpath["dir2"].mkdir()
rootpath["dir3"].mkdir()
fp = rootpath["file1.test"].open("w")
fp.write("1")
fp.close()
fp = rootpath['file2.test'].open('w')
fp.write('12')
fp = rootpath["file2.test"].open("w")
fp.write("12")
fp.close()
fp = rootpath['file3.test'].open('w')
fp.write('123')
fp = rootpath["file3.test"].open("w")
fp.write("123")
fp.close()
fp = rootpath['dir1']['file1.test'].open('w')
fp.write('1')
fp = rootpath["dir1"]["file1.test"].open("w")
fp.write("1")
fp.close()
fp = rootpath['dir2']['file2.test'].open('w')
fp.write('12')
fp = rootpath["dir2"]["file2.test"].open("w")
fp.write("12")
fp.close()
fp = rootpath['dir3']['file3.test'].open('w')
fp.write('123')
fp = rootpath["dir3"]["file3.test"].open("w")
fp.write("123")
fp.close()
return rootpath
testpath = None
def setup_module(module):
# In this unit, we have tests depending on two directory structure. One with only one file in it
# and another with a more complex structure.
testpath = Path(tempfile.mkdtemp())
module.testpath = testpath
rootpath = testpath['onefile']
rootpath = testpath["onefile"]
rootpath.mkdir()
fp = rootpath['test.txt'].open('w')
fp.write('test_data')
fp = rootpath["test.txt"].open("w")
fp.write("test_data")
fp.close()
create_fake_fs(testpath)
def teardown_module(module):
shutil.rmtree(str(module.testpath))
def test_empty():
d = Directories()
eq_(len(d), 0)
assert 'foobar' not in d
assert "foobar" not in d
def test_add_path():
d = Directories()
p = testpath['onefile']
p = testpath["onefile"]
d.add_path(p)
eq_(1, len(d))
assert p in d
assert (p['foobar']) in d
assert (p["foobar"]) in d
assert p.parent() not in d
p = testpath['fs']
p = testpath["fs"]
d.add_path(p)
eq_(2, len(d))
assert p in d
def test_AddPath_when_path_is_already_there():
d = Directories()
p = testpath['onefile']
p = testpath["onefile"]
d.add_path(p)
with raises(AlreadyThereError):
d.add_path(p)
with raises(AlreadyThereError):
d.add_path(p['foobar'])
d.add_path(p["foobar"])
eq_(1, len(d))
def test_add_path_containing_paths_already_there():
d = Directories()
d.add_path(testpath['onefile'])
d.add_path(testpath["onefile"])
eq_(1, len(d))
d.add_path(testpath)
eq_(len(d), 1)
eq_(d[0], testpath)
def test_AddPath_non_latin(tmpdir):
p = Path(str(tmpdir))
to_add = p['unicode\u201a']
to_add = p["unicode\u201a"]
os.mkdir(str(to_add))
d = Directories()
try:
@@ -106,63 +120,69 @@ def test_AddPath_non_latin(tmpdir):
except UnicodeDecodeError:
assert False
def test_del():
d = Directories()
d.add_path(testpath['onefile'])
d.add_path(testpath["onefile"])
try:
del d[1]
assert False
except IndexError:
pass
d.add_path(testpath['fs'])
d.add_path(testpath["fs"])
del d[1]
eq_(1, len(d))
def test_states():
d = Directories()
p = testpath['onefile']
p = testpath["onefile"]
d.add_path(p)
eq_(DirectoryState.Normal, d.get_state(p))
d.set_state(p, DirectoryState.Reference)
eq_(DirectoryState.Reference, d.get_state(p))
eq_(DirectoryState.Reference, d.get_state(p['dir1']))
eq_(DirectoryState.Reference, d.get_state(p["dir1"]))
eq_(1, len(d.states))
eq_(p, list(d.states.keys())[0])
eq_(DirectoryState.Reference, d.states[p])
def test_get_state_with_path_not_there():
# When the path's not there, just return DirectoryState.Normal
d = Directories()
d.add_path(testpath['onefile'])
d.add_path(testpath["onefile"])
eq_(d.get_state(testpath), DirectoryState.Normal)
def test_states_overwritten_when_larger_directory_eat_smaller_ones():
# ref #248
# When setting the state of a folder, we overwrite previously set states for subfolders.
d = Directories()
p = testpath['onefile']
p = testpath["onefile"]
d.add_path(p)
d.set_state(p, DirectoryState.Excluded)
d.add_path(testpath)
d.set_state(testpath, DirectoryState.Reference)
eq_(d.get_state(p), DirectoryState.Reference)
eq_(d.get_state(p['dir1']), DirectoryState.Reference)
eq_(d.get_state(p["dir1"]), DirectoryState.Reference)
eq_(d.get_state(testpath), DirectoryState.Reference)
def test_get_files():
d = Directories()
p = testpath['fs']
p = testpath["fs"]
d.add_path(p)
d.set_state(p['dir1'], DirectoryState.Reference)
d.set_state(p['dir2'], DirectoryState.Excluded)
d.set_state(p["dir1"], DirectoryState.Reference)
d.set_state(p["dir2"], DirectoryState.Excluded)
files = list(d.get_files())
eq_(5, len(files))
for f in files:
if f.path.parent() == p['dir1']:
if f.path.parent() == p["dir1"]:
assert f.is_ref
else:
assert not f.is_ref
def test_get_files_with_folders():
# When fileclasses handle folders, return them and stop recursing!
class FakeFile(File):
@@ -171,106 +191,115 @@ def test_get_files_with_folders():
return True
d = Directories()
p = testpath['fs']
p = testpath["fs"]
d.add_path(p)
files = list(d.get_files(fileclasses=[FakeFile]))
# We have the 3 root files and the 3 root dirs
eq_(6, len(files))
def test_get_folders():
d = Directories()
p = testpath['fs']
p = testpath["fs"]
d.add_path(p)
d.set_state(p['dir1'], DirectoryState.Reference)
d.set_state(p['dir2'], DirectoryState.Excluded)
d.set_state(p["dir1"], DirectoryState.Reference)
d.set_state(p["dir2"], DirectoryState.Excluded)
folders = list(d.get_folders())
eq_(len(folders), 3)
ref = [f for f in folders if f.is_ref]
not_ref = [f for f in folders if not f.is_ref]
eq_(len(ref), 1)
eq_(ref[0].path, p['dir1'])
eq_(ref[0].path, p["dir1"])
eq_(len(not_ref), 2)
eq_(ref[0].size, 1)
def test_get_files_with_inherited_exclusion():
d = Directories()
p = testpath['onefile']
p = testpath["onefile"]
d.add_path(p)
d.set_state(p, DirectoryState.Excluded)
eq_([], list(d.get_files()))
def test_save_and_load(tmpdir):
d1 = Directories()
d2 = Directories()
p1 = Path(str(tmpdir.join('p1')))
p1 = Path(str(tmpdir.join("p1")))
p1.mkdir()
p2 = Path(str(tmpdir.join('p2')))
p2 = Path(str(tmpdir.join("p2")))
p2.mkdir()
d1.add_path(p1)
d1.add_path(p2)
d1.set_state(p1, DirectoryState.Reference)
d1.set_state(p1['dir1'], DirectoryState.Excluded)
tmpxml = str(tmpdir.join('directories_testunit.xml'))
d1.set_state(p1["dir1"], DirectoryState.Excluded)
tmpxml = str(tmpdir.join("directories_testunit.xml"))
d1.save_to_file(tmpxml)
d2.load_from_file(tmpxml)
eq_(2, len(d2))
eq_(DirectoryState.Reference, d2.get_state(p1))
eq_(DirectoryState.Excluded, d2.get_state(p1['dir1']))
eq_(DirectoryState.Excluded, d2.get_state(p1["dir1"]))
def test_invalid_path():
d = Directories()
p = Path('does_not_exist')
p = Path("does_not_exist")
with raises(InvalidPathError):
d.add_path(p)
eq_(0, len(d))
def test_set_state_on_invalid_path():
d = Directories()
try:
d.set_state(Path('foobar',), DirectoryState.Normal)
d.set_state(Path("foobar",), DirectoryState.Normal)
except LookupError:
assert False
def test_load_from_file_with_invalid_path(tmpdir):
#This test simulates a load from file resulting in a
#InvalidPath raise. Other directories must be loaded.
# This test simulates a load from file resulting in a
# InvalidPath raise. Other directories must be loaded.
d1 = Directories()
d1.add_path(testpath['onefile'])
#Will raise InvalidPath upon loading
p = Path(str(tmpdir.join('toremove')))
d1.add_path(testpath["onefile"])
# Will raise InvalidPath upon loading
p = Path(str(tmpdir.join("toremove")))
p.mkdir()
d1.add_path(p)
p.rmdir()
tmpxml = str(tmpdir.join('directories_testunit.xml'))
tmpxml = str(tmpdir.join("directories_testunit.xml"))
d1.save_to_file(tmpxml)
d2 = Directories()
d2.load_from_file(tmpxml)
eq_(1, len(d2))
def test_unicode_save(tmpdir):
d = Directories()
p1 = Path(str(tmpdir))['hello\xe9']
p1 = Path(str(tmpdir))["hello\xe9"]
p1.mkdir()
p1['foo\xe9'].mkdir()
p1["foo\xe9"].mkdir()
d.add_path(p1)
d.set_state(p1['foo\xe9'], DirectoryState.Excluded)
tmpxml = str(tmpdir.join('directories_testunit.xml'))
d.set_state(p1["foo\xe9"], DirectoryState.Excluded)
tmpxml = str(tmpdir.join("directories_testunit.xml"))
try:
d.save_to_file(tmpxml)
except UnicodeDecodeError:
assert False
def test_get_files_refreshes_its_directories():
d = Directories()
p = testpath['fs']
p = testpath["fs"]
d.add_path(p)
files = d.get_files()
eq_(6, len(list(files)))
time.sleep(1)
os.remove(str(p['dir1']['file1.test']))
os.remove(str(p["dir1"]["file1.test"]))
files = d.get_files()
eq_(5, len(list(files)))
def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
d = Directories()
p = Path(str(tmpdir))
@@ -278,36 +307,37 @@ def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
p.rmtree()
eq_([], list(d.get_files()))
def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
d = Directories()
p = Path(str(tmpdir))
hidden_dir_path = p['.foo']
p['.foo'].mkdir()
hidden_dir_path = p[".foo"]
p[".foo"].mkdir()
d.add_path(p)
eq_(d.get_state(hidden_dir_path), DirectoryState.Excluded)
# But it can be overriden
d.set_state(hidden_dir_path, DirectoryState.Normal)
eq_(d.get_state(hidden_dir_path), DirectoryState.Normal)
def test_default_path_state_override(tmpdir):
# It's possible for a subclass to override the default state of a path
class MyDirectories(Directories):
def _default_state_for_path(self, path):
if 'foobar' in path:
if "foobar" in path:
return DirectoryState.Excluded
d = MyDirectories()
p1 = Path(str(tmpdir))
p1['foobar'].mkdir()
p1['foobar/somefile'].open('w').close()
p1['foobaz'].mkdir()
p1['foobaz/somefile'].open('w').close()
p1["foobar"].mkdir()
p1["foobar/somefile"].open("w").close()
p1["foobaz"].mkdir()
p1["foobaz/somefile"].open("w").close()
d.add_path(p1)
eq_(d.get_state(p1['foobaz']), DirectoryState.Normal)
eq_(d.get_state(p1['foobar']), DirectoryState.Excluded)
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
eq_(d.get_state(p1["foobaz"]), DirectoryState.Normal)
eq_(d.get_state(p1["foobar"]), DirectoryState.Excluded)
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
# However, the default state can be changed
d.set_state(p1['foobar'], DirectoryState.Normal)
eq_(d.get_state(p1['foobar']), DirectoryState.Normal)
d.set_state(p1["foobar"], DirectoryState.Normal)
eq_(d.get_state(p1["foobar"]), DirectoryState.Normal)
eq_(len(list(d.get_files())), 2)

View File

@@ -13,13 +13,28 @@ from hscommon.testutil import eq_, log_calls
from .base import NamedObject
from .. import engine
from ..engine import (
get_match, getwords, Group, getfields, unpack_fields, compare_fields, compare, WEIGHT_WORDS,
MATCH_SIMILAR_WORDS, NO_FIELD_ORDER, build_word_dict, get_groups, getmatches, Match,
getmatches_by_contents, merge_similar_words, reduce_common_words
get_match,
getwords,
Group,
getfields,
unpack_fields,
compare_fields,
compare,
WEIGHT_WORDS,
MATCH_SIMILAR_WORDS,
NO_FIELD_ORDER,
build_word_dict,
get_groups,
getmatches,
Match,
getmatches_by_contents,
merge_similar_words,
reduce_common_words,
)
no = NamedObject
def get_match_triangle():
o1 = NamedObject(with_words=True)
o2 = NamedObject(with_words=True)
@@ -29,6 +44,7 @@ def get_match_triangle():
m3 = get_match(o2, o3)
return [m1, m2, m3]
def get_test_group():
m1, m2, m3 = get_match_triangle()
result = Group()
@@ -37,6 +53,7 @@ def get_test_group():
result.add_match(m3)
return result
def assert_match(m, name1, name2):
# When testing matches, whether objects are in first or second position very often doesn't
# matter. This function makes this test more convenient.
@@ -46,53 +63,54 @@ def assert_match(m, name1, name2):
eq_(m.first.name, name2)
eq_(m.second.name, name1)
class TestCasegetwords:
def test_spaces(self):
eq_(['a', 'b', 'c', 'd'], getwords("a b c d"))
eq_(['a', 'b', 'c', 'd'], getwords(" a b c d "))
eq_(["a", "b", "c", "d"], getwords("a b c d"))
eq_(["a", "b", "c", "d"], getwords(" a b c d "))
def test_splitter_chars(self):
eq_(
[chr(i) for i in range(ord('a'), ord('z')+1)],
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
[chr(i) for i in range(ord("a"), ord("z") + 1)],
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z"),
)
def test_joiner_chars(self):
eq_(["aec"], getwords("a'e\u0301c"))
def test_empty(self):
eq_([], getwords(''))
eq_([], getwords(""))
def test_returns_lowercase(self):
eq_(['foo', 'bar'], getwords('FOO BAR'))
eq_(["foo", "bar"], getwords("FOO BAR"))
def test_decompose_unicode(self):
eq_(getwords('foo\xe9bar'), ['fooebar'])
eq_(getwords("foo\xe9bar"), ["fooebar"])
class TestCasegetfields:
def test_simple(self):
eq_([['a', 'b'], ['c', 'd', 'e']], getfields('a b - c d e'))
eq_([["a", "b"], ["c", "d", "e"]], getfields("a b - c d e"))
def test_empty(self):
eq_([], getfields(''))
eq_([], getfields(""))
def test_cleans_empty_fields(self):
expected = [['a', 'bc', 'def']]
actual = getfields(' - a bc def')
expected = [["a", "bc", "def"]]
actual = getfields(" - a bc def")
eq_(expected, actual)
expected = [['bc', 'def']]
expected = [["bc", "def"]]
class TestCaseunpack_fields:
def test_with_fields(self):
expected = ['a', 'b', 'c', 'd', 'e', 'f']
actual = unpack_fields([['a'], ['b', 'c'], ['d', 'e', 'f']])
expected = ["a", "b", "c", "d", "e", "f"]
actual = unpack_fields([["a"], ["b", "c"], ["d", "e", "f"]])
eq_(expected, actual)
def test_without_fields(self):
expected = ['a', 'b', 'c', 'd', 'e', 'f']
actual = unpack_fields(['a', 'b', 'c', 'd', 'e', 'f'])
expected = ["a", "b", "c", "d", "e", "f"]
actual = unpack_fields(["a", "b", "c", "d", "e", "f"])
eq_(expected, actual)
def test_empty(self):
@@ -101,134 +119,151 @@ class TestCaseunpack_fields:
class TestCaseWordCompare:
def test_list(self):
eq_(100, compare(['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd']))
eq_(86, compare(['a', 'b', 'c', 'd'], ['a', 'b', 'c']))
eq_(100, compare(["a", "b", "c", "d"], ["a", "b", "c", "d"]))
eq_(86, compare(["a", "b", "c", "d"], ["a", "b", "c"]))
def test_unordered(self):
#Sometimes, users don't want fuzzy matching too much When they set the slider
#to 100, they don't expect a filename with the same words, but not the same order, to match.
#Thus, we want to return 99 in that case.
eq_(99, compare(['a', 'b', 'c', 'd'], ['d', 'b', 'c', 'a']))
# Sometimes, users don't want fuzzy matching too much When they set the slider
# to 100, they don't expect a filename with the same words, but not the same order, to match.
# Thus, we want to return 99 in that case.
eq_(99, compare(["a", "b", "c", "d"], ["d", "b", "c", "a"]))
def test_word_occurs_twice(self):
#if a word occurs twice in first, but once in second, we want the word to be only counted once
eq_(89, compare(['a', 'b', 'c', 'd', 'a'], ['d', 'b', 'c', 'a']))
# if a word occurs twice in first, but once in second, we want the word to be only counted once
eq_(89, compare(["a", "b", "c", "d", "a"], ["d", "b", "c", "a"]))
def test_uses_copy_of_lists(self):
first = ['foo', 'bar']
second = ['bar', 'bleh']
first = ["foo", "bar"]
second = ["bar", "bleh"]
compare(first, second)
eq_(['foo', 'bar'], first)
eq_(['bar', 'bleh'], second)
eq_(["foo", "bar"], first)
eq_(["bar", "bleh"], second)
def test_word_weight(self):
eq_(int((6.0 / 13.0) * 100), compare(['foo', 'bar'], ['bar', 'bleh'], (WEIGHT_WORDS, )))
eq_(
int((6.0 / 13.0) * 100),
compare(["foo", "bar"], ["bar", "bleh"], (WEIGHT_WORDS,)),
)
def test_similar_words(self):
eq_(100, compare(['the', 'white', 'stripes'], ['the', 'whites', 'stripe'], (MATCH_SIMILAR_WORDS, )))
eq_(
100,
compare(
["the", "white", "stripes"],
["the", "whites", "stripe"],
(MATCH_SIMILAR_WORDS,),
),
)
def test_empty(self):
eq_(0, compare([], []))
def test_with_fields(self):
eq_(67, compare([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
eq_(67, compare([["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]))
def test_propagate_flags_with_fields(self, monkeypatch):
def mock_compare(first, second, flags):
eq_((0, 1, 2, 3, 5), flags)
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
compare([['a']], [['a']], (0, 1, 2, 3, 5))
monkeypatch.setattr(engine, "compare_fields", mock_compare)
compare([["a"]], [["a"]], (0, 1, 2, 3, 5))
class TestCaseWordCompareWithFields:
def test_simple(self):
eq_(67, compare_fields([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
eq_(
67,
compare_fields(
[["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]
),
)
def test_empty(self):
eq_(0, compare_fields([], []))
def test_different_length(self):
eq_(0, compare_fields([['a'], ['b']], [['a'], ['b'], ['c']]))
eq_(0, compare_fields([["a"], ["b"]], [["a"], ["b"], ["c"]]))
def test_propagates_flags(self, monkeypatch):
def mock_compare(first, second, flags):
eq_((0, 1, 2, 3, 5), flags)
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
compare_fields([['a']], [['a']], (0, 1, 2, 3, 5))
monkeypatch.setattr(engine, "compare_fields", mock_compare)
compare_fields([["a"]], [["a"]], (0, 1, 2, 3, 5))
def test_order(self):
first = [['a', 'b'], ['c', 'd', 'e']]
second = [['c', 'd', 'f'], ['a', 'b']]
first = [["a", "b"], ["c", "d", "e"]]
second = [["c", "d", "f"], ["a", "b"]]
eq_(0, compare_fields(first, second))
def test_no_order(self):
first = [['a', 'b'], ['c', 'd', 'e']]
second = [['c', 'd', 'f'], ['a', 'b']]
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
first = [['a', 'b'], ['a', 'b']] #a field can only be matched once.
second = [['c', 'd', 'f'], ['a', 'b']]
eq_(0, compare_fields(first, second, (NO_FIELD_ORDER, )))
first = [['a', 'b'], ['a', 'b', 'c']]
second = [['c', 'd', 'f'], ['a', 'b']]
eq_(33, compare_fields(first, second, (NO_FIELD_ORDER, )))
first = [["a", "b"], ["c", "d", "e"]]
second = [["c", "d", "f"], ["a", "b"]]
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER,)))
first = [["a", "b"], ["a", "b"]] # a field can only be matched once.
second = [["c", "d", "f"], ["a", "b"]]
eq_(0, compare_fields(first, second, (NO_FIELD_ORDER,)))
first = [["a", "b"], ["a", "b", "c"]]
second = [["c", "d", "f"], ["a", "b"]]
eq_(33, compare_fields(first, second, (NO_FIELD_ORDER,)))
def test_compare_fields_without_order_doesnt_alter_fields(self):
#The NO_ORDER comp type altered the fields!
first = [['a', 'b'], ['c', 'd', 'e']]
second = [['c', 'd', 'f'], ['a', 'b']]
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
eq_([['a', 'b'], ['c', 'd', 'e']], first)
eq_([['c', 'd', 'f'], ['a', 'b']], second)
# The NO_ORDER comp type altered the fields!
first = [["a", "b"], ["c", "d", "e"]]
second = [["c", "d", "f"], ["a", "b"]]
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER,)))
eq_([["a", "b"], ["c", "d", "e"]], first)
eq_([["c", "d", "f"], ["a", "b"]], second)
class TestCasebuild_word_dict:
def test_with_standard_words(self):
l = [NamedObject('foo bar', True)]
l.append(NamedObject('bar baz', True))
l.append(NamedObject('baz bleh foo', True))
d = build_word_dict(l)
itemList = [NamedObject("foo bar", True)]
itemList.append(NamedObject("bar baz", True))
itemList.append(NamedObject("baz bleh foo", True))
d = build_word_dict(itemList)
eq_(4, len(d))
eq_(2, len(d['foo']))
assert l[0] in d['foo']
assert l[2] in d['foo']
eq_(2, len(d['bar']))
assert l[0] in d['bar']
assert l[1] in d['bar']
eq_(2, len(d['baz']))
assert l[1] in d['baz']
assert l[2] in d['baz']
eq_(1, len(d['bleh']))
assert l[2] in d['bleh']
eq_(2, len(d["foo"]))
assert itemList[0] in d["foo"]
assert itemList[2] in d["foo"]
eq_(2, len(d["bar"]))
assert itemList[0] in d["bar"]
assert itemList[1] in d["bar"]
eq_(2, len(d["baz"]))
assert itemList[1] in d["baz"]
assert itemList[2] in d["baz"]
eq_(1, len(d["bleh"]))
assert itemList[2] in d["bleh"]
def test_unpack_fields(self):
o = NamedObject('')
o.words = [['foo', 'bar'], ['baz']]
o = NamedObject("")
o.words = [["foo", "bar"], ["baz"]]
d = build_word_dict([o])
eq_(3, len(d))
eq_(1, len(d['foo']))
eq_(1, len(d["foo"]))
def test_words_are_unaltered(self):
o = NamedObject('')
o.words = [['foo', 'bar'], ['baz']]
o = NamedObject("")
o.words = [["foo", "bar"], ["baz"]]
build_word_dict([o])
eq_([['foo', 'bar'], ['baz']], o.words)
eq_([["foo", "bar"], ["baz"]], o.words)
def test_object_instances_can_only_be_once_in_words_object_list(self):
o = NamedObject('foo foo', True)
o = NamedObject("foo foo", True)
d = build_word_dict([o])
eq_(1, len(d['foo']))
eq_(1, len(d["foo"]))
def test_job(self):
def do_progress(p, d=''):
def do_progress(p, d=""):
self.log.append(p)
return True
j = job.Job(1, do_progress)
self.log = []
s = "foo bar"
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
build_word_dict(
[NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j
)
# We don't have intermediate log because iter_with_progress is called with every > 1
eq_(0, self.log[0])
eq_(100, self.log[1])
@@ -237,51 +272,56 @@ class TestCasebuild_word_dict:
class TestCasemerge_similar_words:
def test_some_similar_words(self):
d = {
'foobar': set([1]),
'foobar1': set([2]),
'foobar2': set([3]),
"foobar": set([1]),
"foobar1": set([2]),
"foobar2": set([3]),
}
merge_similar_words(d)
eq_(1, len(d))
eq_(3, len(d['foobar']))
eq_(3, len(d["foobar"]))
class TestCasereduce_common_words:
def test_typical(self):
d = {
'foo': set([NamedObject('foo bar', True) for i in range(50)]),
'bar': set([NamedObject('foo bar', True) for i in range(49)])
"foo": set([NamedObject("foo bar", True) for i in range(50)]),
"bar": set([NamedObject("foo bar", True) for i in range(49)]),
}
reduce_common_words(d, 50)
assert 'foo' not in d
eq_(49, len(d['bar']))
assert "foo" not in d
eq_(49, len(d["bar"]))
def test_dont_remove_objects_with_only_common_words(self):
d = {
'common': set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
'uncommon': set([NamedObject("common uncommon", True)])
"common": set(
[NamedObject("common uncommon", True) for i in range(50)]
+ [NamedObject("common", True)]
),
"uncommon": set([NamedObject("common uncommon", True)]),
}
reduce_common_words(d, 50)
eq_(1, len(d['common']))
eq_(1, len(d['uncommon']))
eq_(1, len(d["common"]))
eq_(1, len(d["uncommon"]))
def test_values_still_are_set_instances(self):
d = {
'common': set([NamedObject("common uncommon", True) for i in range(50)] + [NamedObject("common", True)]),
'uncommon': set([NamedObject("common uncommon", True)])
"common": set(
[NamedObject("common uncommon", True) for i in range(50)]
+ [NamedObject("common", True)]
),
"uncommon": set([NamedObject("common uncommon", True)]),
}
reduce_common_words(d, 50)
assert isinstance(d['common'], set)
assert isinstance(d['uncommon'], set)
assert isinstance(d["common"], set)
assert isinstance(d["uncommon"], set)
def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
#If a word has been removed by the reduce, an object in a subsequent common word that
#contains the word that has been removed would cause a KeyError.
# If a word has been removed by the reduce, an object in a subsequent common word that
# contains the word that has been removed would cause a KeyError.
d = {
'foo': set([NamedObject('foo bar baz', True) for i in range(50)]),
'bar': set([NamedObject('foo bar baz', True) for i in range(50)]),
'baz': set([NamedObject('foo bar baz', True) for i in range(49)])
"foo": set([NamedObject("foo bar baz", True) for i in range(50)]),
"bar": set([NamedObject("foo bar baz", True) for i in range(50)]),
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
}
try:
reduce_common_words(d, 50)
@@ -289,35 +329,37 @@ class TestCasereduce_common_words:
self.fail()
def test_unpack_fields(self):
#object.words may be fields.
# object.words may be fields.
def create_it():
o = NamedObject('')
o.words = [['foo', 'bar'], ['baz']]
o = NamedObject("")
o.words = [["foo", "bar"], ["baz"]]
return o
d = {
'foo': set([create_it() for i in range(50)])
}
d = {"foo": set([create_it() for i in range(50)])}
try:
reduce_common_words(d, 50)
except TypeError:
self.fail("must support fields.")
def test_consider_a_reduced_common_word_common_even_after_reduction(self):
#There was a bug in the code that causeda word that has already been reduced not to
#be counted as a common word for subsequent words. For example, if 'foo' is processed
#as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
#would not stay in 'bar' because 'foo' is not a common word anymore.
only_common = NamedObject('foo bar', True)
# There was a bug in the code that causeda word that has already been reduced not to
# be counted as a common word for subsequent words. For example, if 'foo' is processed
# as a common word, keeping a "foo bar" file in it, and the 'bar' is processed, "foo bar"
# would not stay in 'bar' because 'foo' is not a common word anymore.
only_common = NamedObject("foo bar", True)
d = {
'foo': set([NamedObject('foo bar baz', True) for i in range(49)] + [only_common]),
'bar': set([NamedObject('foo bar baz', True) for i in range(49)] + [only_common]),
'baz': set([NamedObject('foo bar baz', True) for i in range(49)])
"foo": set(
[NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
),
"bar": set(
[NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
),
"baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
}
reduce_common_words(d, 50)
eq_(1, len(d['foo']))
eq_(1, len(d['bar']))
eq_(49, len(d['baz']))
eq_(1, len(d["foo"]))
eq_(1, len(d["bar"]))
eq_(49, len(d["baz"]))
class TestCaseget_match:
@@ -326,8 +368,8 @@ class TestCaseget_match:
o2 = NamedObject("bar bleh", True)
m = get_match(o1, o2)
eq_(50, m.percentage)
eq_(['foo', 'bar'], m.first.words)
eq_(['bar', 'bleh'], m.second.words)
eq_(["foo", "bar"], m.first.words)
eq_(["bar", "bleh"], m.second.words)
assert m.first is o1
assert m.second is o2
@@ -340,7 +382,9 @@ class TestCaseget_match:
assert object() not in m
def test_word_weight(self):
m = get_match(NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS, ))
m = get_match(
NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS,)
)
eq_(m.percentage, int((6.0 / 13.0) * 100))
@@ -349,54 +393,59 @@ class TestCaseGetMatches:
eq_(getmatches([]), [])
def test_simple(self):
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
r = getmatches(l)
itemList = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
r = getmatches(itemList)
eq_(2, len(r))
m = first(m for m in r if m.percentage == 50) #"foo bar" and "bar bleh"
assert_match(m, 'foo bar', 'bar bleh')
m = first(m for m in r if m.percentage == 33) #"foo bar" and "a b c foo"
assert_match(m, 'foo bar', 'a b c foo')
m = first(m for m in r if m.percentage == 50) # "foo bar" and "bar bleh"
assert_match(m, "foo bar", "bar bleh")
m = first(m for m in r if m.percentage == 33) # "foo bar" and "a b c foo"
assert_match(m, "foo bar", "a b c foo")
def test_null_and_unrelated_objects(self):
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject(""), NamedObject("unrelated object")]
r = getmatches(l)
itemList = [
NamedObject("foo bar"),
NamedObject("bar bleh"),
NamedObject(""),
NamedObject("unrelated object"),
]
r = getmatches(itemList)
eq_(len(r), 1)
m = r[0]
eq_(m.percentage, 50)
assert_match(m, 'foo bar', 'bar bleh')
assert_match(m, "foo bar", "bar bleh")
def test_twice_the_same_word(self):
l = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
r = getmatches(l)
itemList = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
r = getmatches(itemList)
eq_(1, len(r))
def test_twice_the_same_word_when_preworded(self):
l = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
r = getmatches(l)
itemList = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
r = getmatches(itemList)
eq_(1, len(r))
def test_two_words_match(self):
l = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
r = getmatches(l)
itemList = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
r = getmatches(itemList)
eq_(1, len(r))
def test_match_files_with_only_common_words(self):
#If a word occurs more than 50 times, it is excluded from the matching process
#The problem with the common_word_threshold is that the files containing only common
#words will never be matched together. We *should* match them.
# If a word occurs more than 50 times, it is excluded from the matching process
# The problem with the common_word_threshold is that the files containing only common
# words will never be matched together. We *should* match them.
# This test assumes that the common word threashold const is 50
l = [NamedObject("foo") for i in range(50)]
r = getmatches(l)
itemList = [NamedObject("foo") for i in range(50)]
r = getmatches(itemList)
eq_(1225, len(r))
def test_use_words_already_there_if_there(self):
o1 = NamedObject('foo')
o2 = NamedObject('bar')
o2.words = ['foo']
o1 = NamedObject("foo")
o2 = NamedObject("bar")
o2.words = ["foo"]
eq_(1, len(getmatches([o1, o2])))
def test_job(self):
def do_progress(p, d=''):
def do_progress(p, d=""):
self.log.append(p)
return True
@@ -409,28 +458,28 @@ class TestCaseGetMatches:
eq_(100, self.log[-1])
def test_weight_words(self):
l = [NamedObject("foo bar"), NamedObject("bar bleh")]
m = getmatches(l, weight_words=True)[0]
itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
m = getmatches(itemList, weight_words=True)[0]
eq_(int((6.0 / 13.0) * 100), m.percentage)
def test_similar_word(self):
l = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(l, match_similar_words=True)), 1)
eq_(getmatches(l, match_similar_words=True)[0].percentage, 100)
l = [NamedObject("foobar"), NamedObject("foo")]
eq_(len(getmatches(l, match_similar_words=True)), 0) #too far
l = [NamedObject("bizkit"), NamedObject("bizket")]
eq_(len(getmatches(l, match_similar_words=True)), 1)
l = [NamedObject("foobar"), NamedObject("foosbar")]
eq_(len(getmatches(l, match_similar_words=True)), 1)
itemList = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
eq_(getmatches(itemList, match_similar_words=True)[0].percentage, 100)
itemList = [NamedObject("foobar"), NamedObject("foo")]
eq_(len(getmatches(itemList, match_similar_words=True)), 0) # too far
itemList = [NamedObject("bizkit"), NamedObject("bizket")]
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
itemList = [NamedObject("foobar"), NamedObject("foosbar")]
eq_(len(getmatches(itemList, match_similar_words=True)), 1)
def test_single_object_with_similar_words(self):
l = [NamedObject("foo foos")]
eq_(len(getmatches(l, match_similar_words=True)), 0)
itemList = [NamedObject("foo foos")]
eq_(len(getmatches(itemList, match_similar_words=True)), 0)
def test_double_words_get_counted_only_once(self):
l = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
m = getmatches(l)[0]
itemList = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
m = getmatches(itemList)[0]
eq_(75, m.percentage)
def test_with_fields(self):
@@ -450,13 +499,13 @@ class TestCaseGetMatches:
eq_(m.percentage, 50)
def test_only_match_similar_when_the_option_is_set(self):
l = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(l, match_similar_words=False)), 0)
itemList = [NamedObject("foobar"), NamedObject("foobars")]
eq_(len(getmatches(itemList, match_similar_words=False)), 0)
def test_dont_recurse_do_match(self):
# with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
sys.setrecursionlimit(200)
files = [NamedObject('foo bar') for i in range(201)]
files = [NamedObject("foo bar") for i in range(201)]
try:
getmatches(files)
except RuntimeError:
@@ -465,9 +514,9 @@ class TestCaseGetMatches:
sys.setrecursionlimit(1000)
def test_min_match_percentage(self):
l = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
r = getmatches(l, min_match_percentage=50)
eq_(1, len(r)) #Only "foo bar" / "bar bleh" should match
itemList = [NamedObject("foo bar"), NamedObject("bar bleh"), NamedObject("a b c foo")]
r = getmatches(itemList, min_match_percentage=50)
eq_(1, len(r)) # Only "foo bar" / "bar bleh" should match
def test_MemoryError(self, monkeypatch):
@log_calls
@@ -476,12 +525,12 @@ class TestCaseGetMatches:
raise MemoryError()
return Match(first, second, 0)
objects = [NamedObject() for i in range(10)] # results in 45 matches
monkeypatch.setattr(engine, 'get_match', mocked_match)
objects = [NamedObject() for i in range(10)] # results in 45 matches
monkeypatch.setattr(engine, "get_match", mocked_match)
try:
r = getmatches(objects)
except MemoryError:
self.fail('MemorryError must be handled')
self.fail("MemorryError must be handled")
eq_(42, len(r))
@@ -599,7 +648,7 @@ class TestCaseGroup:
eq_([o1], g.dupes)
g.switch_ref(o2)
assert o2 is g.ref
g.switch_ref(NamedObject('', True))
g.switch_ref(NamedObject("", True))
assert o2 is g.ref
def test_switch_ref_from_ref_dir(self):
@@ -620,11 +669,11 @@ class TestCaseGroup:
m = g.get_match_of(o)
assert g.ref in m
assert o in m
assert g.get_match_of(NamedObject('', True)) is None
assert g.get_match_of(NamedObject("", True)) is None
assert g.get_match_of(g.ref) is None
def test_percentage(self):
#percentage should return the avg percentage in relation to the ref
# percentage should return the avg percentage in relation to the ref
m1, m2, m3 = get_match_triangle()
m1 = Match(m1[0], m1[1], 100)
m2 = Match(m2[0], m2[1], 50)
@@ -651,9 +700,9 @@ class TestCaseGroup:
o1 = m1.first
o2 = m1.second
o3 = m2.second
o1.name = 'c'
o2.name = 'b'
o3.name = 'a'
o1.name = "c"
o2.name = "b"
o3.name = "a"
g = Group()
g.add_match(m1)
g.add_match(m2)
@@ -709,9 +758,9 @@ class TestCaseGroup:
def test_prioritize_nothing_changes(self):
# prioritize() returns False when nothing changes in the group.
g = get_test_group()
g[0].name = 'a'
g[1].name = 'b'
g[2].name = 'c'
g[0].name = "a"
g[1].name = "b"
g[2].name = "c"
assert not g.prioritize(lambda x: x.name)
def test_list_like(self):
@@ -723,7 +772,11 @@ class TestCaseGroup:
def test_discard_matches(self):
g = Group()
o1, o2, o3 = (NamedObject("foo", True), NamedObject("bar", True), NamedObject("baz", True))
o1, o2, o3 = (
NamedObject("foo", True),
NamedObject("bar", True),
NamedObject("baz", True),
)
g.add_match(get_match(o1, o2))
g.add_match(get_match(o1, o3))
g.discard_matches()
@@ -737,8 +790,8 @@ class TestCaseget_groups:
eq_([], r)
def test_simple(self):
l = [NamedObject("foo bar"), NamedObject("bar bleh")]
matches = getmatches(l)
itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
matches = getmatches(itemList)
m = matches[0]
r = get_groups(matches)
eq_(1, len(r))
@@ -747,28 +800,39 @@ class TestCaseget_groups:
eq_([m.second], g.dupes)
def test_group_with_multiple_matches(self):
#This results in 3 matches
l = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
matches = getmatches(l)
# This results in 3 matches
itemList = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
matches = getmatches(itemList)
r = get_groups(matches)
eq_(1, len(r))
g = r[0]
eq_(3, len(g))
def test_must_choose_a_group(self):
l = [NamedObject("a b"), NamedObject("a b"), NamedObject("b c"), NamedObject("c d"), NamedObject("c d")]
#There will be 2 groups here: group "a b" and group "c d"
#"b c" can go either of them, but not both.
matches = getmatches(l)
itemList = [
NamedObject("a b"),
NamedObject("a b"),
NamedObject("b c"),
NamedObject("c d"),
NamedObject("c d"),
]
# There will be 2 groups here: group "a b" and group "c d"
# "b c" can go either of them, but not both.
matches = getmatches(itemList)
r = get_groups(matches)
eq_(2, len(r))
eq_(5, len(r[0])+len(r[1]))
eq_(5, len(r[0]) + len(r[1]))
def test_should_all_go_in_the_same_group(self):
l = [NamedObject("a b"), NamedObject("a b"), NamedObject("a b"), NamedObject("a b")]
#There will be 2 groups here: group "a b" and group "c d"
#"b c" can fit in both, but it must be in only one of them
matches = getmatches(l)
itemList = [
NamedObject("a b"),
NamedObject("a b"),
NamedObject("a b"),
NamedObject("a b"),
]
# There will be 2 groups here: group "a b" and group "c d"
# "b c" can fit in both, but it must be in only one of them
matches = getmatches(itemList)
r = get_groups(matches)
eq_(1, len(r))
@@ -787,8 +851,8 @@ class TestCaseget_groups:
assert o3 in g
def test_four_sized_group(self):
l = [NamedObject("foobar") for i in range(4)]
m = getmatches(l)
itemList = [NamedObject("foobar") for i in range(4)]
m = getmatches(itemList)
r = get_groups(m)
eq_(1, len(r))
eq_(4, len(r[0]))
@@ -808,10 +872,12 @@ class TestCaseget_groups:
# (A, B) match is the highest (thus resulting in an (A, B) group), still match C and D
# in a separate group instead of discarding them.
A, B, C, D = [NamedObject() for _ in range(4)]
m1 = Match(A, B, 90) # This is the strongest "A" match
m2 = Match(A, C, 80) # Because C doesn't match with B, it won't be in the group
m3 = Match(A, D, 80) # Same thing for D
m4 = Match(C, D, 70) # However, because C and D match, they should have their own group.
m1 = Match(A, B, 90) # This is the strongest "A" match
m2 = Match(A, C, 80) # Because C doesn't match with B, it won't be in the group
m3 = Match(A, D, 80) # Same thing for D
m4 = Match(
C, D, 70
) # However, because C and D match, they should have their own group.
groups = get_groups([m1, m2, m3, m4])
eq_(len(groups), 2)
g1, g2 = groups
@@ -819,4 +885,3 @@ class TestCaseget_groups:
assert B in g1
assert C in g2
assert D in g2

View File

@@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2009-10-23
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import hashlib
@@ -14,32 +14,35 @@ from core.tests.directories_test import create_fake_fs
from .. import fs
def test_size_aggregates_subfiles(tmpdir):
p = create_fake_fs(Path(str(tmpdir)))
b = fs.Folder(p)
eq_(b.size, 12)
def test_md5_aggregate_subfiles_sorted(tmpdir):
#dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
#all files' md5 it contains, but it must make sure that it does so in the
#same order everytime.
# dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
# all files' md5 it contains, but it must make sure that it does so in the
# same order everytime.
p = create_fake_fs(Path(str(tmpdir)))
b = fs.Folder(p)
md51 = fs.File(p['dir1']['file1.test']).md5
md52 = fs.File(p['dir2']['file2.test']).md5
md53 = fs.File(p['dir3']['file3.test']).md5
md54 = fs.File(p['file1.test']).md5
md55 = fs.File(p['file2.test']).md5
md56 = fs.File(p['file3.test']).md5
md51 = fs.File(p["dir1"]["file1.test"]).md5
md52 = fs.File(p["dir2"]["file2.test"]).md5
md53 = fs.File(p["dir3"]["file3.test"]).md5
md54 = fs.File(p["file1.test"]).md5
md55 = fs.File(p["file2.test"]).md5
md56 = fs.File(p["file3.test"]).md5
# The expected md5 is the md5 of md5s for folders and the direct md5 for files
folder_md51 = hashlib.md5(md51).digest()
folder_md52 = hashlib.md5(md52).digest()
folder_md53 = hashlib.md5(md53).digest()
md5 = hashlib.md5(folder_md51+folder_md52+folder_md53+md54+md55+md56)
md5 = hashlib.md5(folder_md51 + folder_md52 + folder_md53 + md54 + md55 + md56)
eq_(b.md5, md5.digest())
def test_has_file_attrs(tmpdir):
#a Folder must behave like a file, so it must have mtime attributes
# a Folder must behave like a file, so it must have mtime attributes
b = fs.Folder(Path(str(tmpdir)))
assert b.mtime > 0
eq_(b.extension, '')
eq_(b.extension, "")

View File

@@ -12,152 +12,172 @@ from hscommon.testutil import eq_
from ..ignore import IgnoreList
def test_empty():
il = IgnoreList()
eq_(0, len(il))
assert not il.AreIgnored('foo', 'bar')
assert not il.AreIgnored("foo", "bar")
def test_simple():
il = IgnoreList()
il.Ignore('foo', 'bar')
assert il.AreIgnored('foo', 'bar')
assert il.AreIgnored('bar', 'foo')
assert not il.AreIgnored('foo', 'bleh')
assert not il.AreIgnored('bleh', 'bar')
il.Ignore("foo", "bar")
assert il.AreIgnored("foo", "bar")
assert il.AreIgnored("bar", "foo")
assert not il.AreIgnored("foo", "bleh")
assert not il.AreIgnored("bleh", "bar")
eq_(1, len(il))
def test_multiple():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('foo', 'bleh')
il.Ignore('bleh', 'bar')
il.Ignore('aybabtu', 'bleh')
assert il.AreIgnored('foo', 'bar')
assert il.AreIgnored('bar', 'foo')
assert il.AreIgnored('foo', 'bleh')
assert il.AreIgnored('bleh', 'bar')
assert not il.AreIgnored('aybabtu', 'bar')
il.Ignore("foo", "bar")
il.Ignore("foo", "bleh")
il.Ignore("bleh", "bar")
il.Ignore("aybabtu", "bleh")
assert il.AreIgnored("foo", "bar")
assert il.AreIgnored("bar", "foo")
assert il.AreIgnored("foo", "bleh")
assert il.AreIgnored("bleh", "bar")
assert not il.AreIgnored("aybabtu", "bar")
eq_(4, len(il))
def test_clear():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore("foo", "bar")
il.Clear()
assert not il.AreIgnored('foo', 'bar')
assert not il.AreIgnored('bar', 'foo')
assert not il.AreIgnored("foo", "bar")
assert not il.AreIgnored("bar", "foo")
eq_(0, len(il))
def test_add_same_twice():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('bar', 'foo')
il.Ignore("foo", "bar")
il.Ignore("bar", "foo")
eq_(1, len(il))
def test_save_to_xml():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('foo', 'bleh')
il.Ignore('bleh', 'bar')
il.Ignore("foo", "bar")
il.Ignore("foo", "bleh")
il.Ignore("bleh", "bar")
f = io.BytesIO()
il.save_to_xml(f)
f.seek(0)
doc = ET.parse(f)
root = doc.getroot()
eq_(root.tag, 'ignore_list')
eq_(root.tag, "ignore_list")
eq_(len(root), 2)
eq_(len([c for c in root if c.tag == 'file']), 2)
eq_(len([c for c in root if c.tag == "file"]), 2)
f1, f2 = root[:]
subchildren = [c for c in f1 if c.tag == 'file'] + [c for c in f2 if c.tag == 'file']
subchildren = [c for c in f1 if c.tag == "file"] + [
c for c in f2 if c.tag == "file"
]
eq_(len(subchildren), 3)
def test_SaveThenLoad():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('foo', 'bleh')
il.Ignore('bleh', 'bar')
il.Ignore('\u00e9', 'bar')
il.Ignore("foo", "bar")
il.Ignore("foo", "bleh")
il.Ignore("bleh", "bar")
il.Ignore("\u00e9", "bar")
f = io.BytesIO()
il.save_to_xml(f)
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)
eq_(4, len(il))
assert il.AreIgnored('\u00e9', 'bar')
assert il.AreIgnored("\u00e9", "bar")
def test_LoadXML_with_empty_file_tags():
f = io.BytesIO()
f.write(b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
f.write(
b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>'
)
f.seek(0)
il = IgnoreList()
il.load_from_xml(f)
eq_(0, len(il))
def test_AreIgnore_works_when_a_child_is_a_key_somewhere_else():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('bar', 'baz')
assert il.AreIgnored('bar', 'foo')
il.Ignore("foo", "bar")
il.Ignore("bar", "baz")
assert il.AreIgnored("bar", "foo")
def test_no_dupes_when_a_child_is_a_key_somewhere_else():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('bar', 'baz')
il.Ignore('bar', 'foo')
il.Ignore("foo", "bar")
il.Ignore("bar", "baz")
il.Ignore("bar", "foo")
eq_(2, len(il))
def test_iterate():
#It must be possible to iterate through ignore list
# It must be possible to iterate through ignore list
il = IgnoreList()
expected = [('foo', 'bar'), ('bar', 'baz'), ('foo', 'baz')]
expected = [("foo", "bar"), ("bar", "baz"), ("foo", "baz")]
for i in expected:
il.Ignore(i[0], i[1])
for i in il:
expected.remove(i) #No exception should be raised
assert not expected #expected should be empty
expected.remove(i) # No exception should be raised
assert not expected # expected should be empty
def test_filter():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('bar', 'baz')
il.Ignore('foo', 'baz')
il.Filter(lambda f, s: f == 'bar')
il.Ignore("foo", "bar")
il.Ignore("bar", "baz")
il.Ignore("foo", "baz")
il.Filter(lambda f, s: f == "bar")
eq_(1, len(il))
assert not il.AreIgnored('foo', 'bar')
assert il.AreIgnored('bar', 'baz')
assert not il.AreIgnored("foo", "bar")
assert il.AreIgnored("bar", "baz")
def test_save_with_non_ascii_items():
il = IgnoreList()
il.Ignore('\xac', '\xbf')
il.Ignore("\xac", "\xbf")
f = io.BytesIO()
try:
il.save_to_xml(f)
except Exception as e:
raise AssertionError(str(e))
def test_len():
il = IgnoreList()
eq_(0, len(il))
il.Ignore('foo', 'bar')
il.Ignore("foo", "bar")
eq_(1, len(il))
def test_nonzero():
il = IgnoreList()
assert not il
il.Ignore('foo', 'bar')
il.Ignore("foo", "bar")
assert il
def test_remove():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('foo', 'baz')
il.remove('bar', 'foo')
il.Ignore("foo", "bar")
il.Ignore("foo", "baz")
il.remove("bar", "foo")
eq_(len(il), 1)
assert not il.AreIgnored('foo', 'bar')
assert not il.AreIgnored("foo", "bar")
def test_remove_non_existant():
il = IgnoreList()
il.Ignore('foo', 'bar')
il.Ignore('foo', 'baz')
il.Ignore("foo", "bar")
il.Ignore("foo", "baz")
with raises(ValueError):
il.remove('foo', 'bleh')
il.remove("foo", "bleh")

View File

@@ -8,33 +8,39 @@ from hscommon.testutil import eq_
from ..markable import MarkableList, Markable
def gen():
ml = MarkableList()
ml.extend(list(range(10)))
return ml
def test_unmarked():
ml = gen()
for i in ml:
assert not ml.is_marked(i)
def test_mark():
ml = gen()
assert ml.mark(3)
assert ml.is_marked(3)
assert not ml.is_marked(2)
def test_unmark():
ml = gen()
ml.mark(4)
assert ml.unmark(4)
assert not ml.is_marked(4)
def test_unmark_unmarked():
ml = gen()
assert not ml.unmark(4)
assert not ml.is_marked(4)
def test_mark_twice_and_unmark():
ml = gen()
assert ml.mark(5)
@@ -42,6 +48,7 @@ def test_mark_twice_and_unmark():
ml.unmark(5)
assert not ml.is_marked(5)
def test_mark_toggle():
ml = gen()
ml.mark_toggle(6)
@@ -51,22 +58,25 @@ def test_mark_toggle():
ml.mark_toggle(6)
assert ml.is_marked(6)
def test_is_markable():
class Foobar(Markable):
def _is_markable(self, o):
return o == 'foobar'
return o == "foobar"
f = Foobar()
assert not f.is_marked('foobar')
assert not f.mark('foo')
assert not f.is_marked('foo')
f.mark_toggle('foo')
assert not f.is_marked('foo')
f.mark('foobar')
assert f.is_marked('foobar')
assert not f.is_marked("foobar")
assert not f.mark("foo")
assert not f.is_marked("foo")
f.mark_toggle("foo")
assert not f.is_marked("foo")
f.mark("foobar")
assert f.is_marked("foobar")
ml = gen()
ml.mark(11)
assert not ml.is_marked(11)
def test_change_notifications():
class Foobar(Markable):
def _did_mark(self, o):
@@ -77,13 +87,14 @@ def test_change_notifications():
f = Foobar()
f.log = []
f.mark('foo')
f.mark('foo')
f.mark_toggle('bar')
f.unmark('foo')
f.unmark('foo')
f.mark_toggle('bar')
eq_([(True, 'foo'), (True, 'bar'), (False, 'foo'), (False, 'bar')], f.log)
f.mark("foo")
f.mark("foo")
f.mark_toggle("bar")
f.unmark("foo")
f.unmark("foo")
f.mark_toggle("bar")
eq_([(True, "foo"), (True, "bar"), (False, "foo"), (False, "bar")], f.log)
def test_mark_count():
ml = gen()
@@ -93,6 +104,7 @@ def test_mark_count():
ml.mark(11)
eq_(1, ml.mark_count)
def test_mark_none():
log = []
ml = gen()
@@ -104,6 +116,7 @@ def test_mark_none():
eq_(0, ml.mark_count)
eq_([1, 2], log)
def test_mark_all():
ml = gen()
eq_(0, ml.mark_count)
@@ -111,6 +124,7 @@ def test_mark_all():
eq_(10, ml.mark_count)
assert ml.is_marked(1)
def test_mark_invert():
ml = gen()
ml.mark(1)
@@ -118,6 +132,7 @@ def test_mark_invert():
assert not ml.is_marked(1)
assert ml.is_marked(2)
def test_mark_while_inverted():
log = []
ml = gen()
@@ -134,6 +149,7 @@ def test_mark_while_inverted():
eq_(7, ml.mark_count)
eq_([(True, 1), (False, 1), (True, 2), (True, 1), (True, 3)], log)
def test_remove_mark_flag():
ml = gen()
ml.mark(1)
@@ -145,10 +161,12 @@ def test_remove_mark_flag():
ml._remove_mark_flag(1)
assert ml.is_marked(1)
def test_is_marked_returns_false_if_object_not_markable():
class MyMarkableList(MarkableList):
def _is_markable(self, o):
return o != 4
ml = MyMarkableList()
ml.extend(list(range(10)))
ml.mark_invert()

View File

@@ -1,9 +1,9 @@
# Created By: Virgil Dupras
# Created On: 2011/09/07
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import os.path as op
@@ -14,6 +14,7 @@ from ..engine import Group, Match
no = NamedObject
def app_with_dupes(dupes):
# Creates an app with specified dupes. dupes is a list of lists, each list in the list being
# a dupe group. We cheat a little bit by creating dupe groups manually instead of running a
@@ -29,57 +30,63 @@ def app_with_dupes(dupes):
app.app._results_changed()
return app
#---
# ---
def app_normal_results():
# Just some results, with different extensions and size, for good measure.
dupes = [
[
no('foo1.ext1', size=1, folder='folder1'),
no('foo2.ext2', size=2, folder='folder2')
no("foo1.ext1", size=1, folder="folder1"),
no("foo2.ext2", size=2, folder="folder2"),
],
]
return app_with_dupes(dupes)
@with_app(app_normal_results)
def test_kind_subcrit(app):
# The subcriteria of the "Kind" criteria is a list of extensions contained in the dupes.
app.select_pri_criterion("Kind")
eq_(app.pdialog.criteria_list[:], ['ext1', 'ext2'])
eq_(app.pdialog.criteria_list[:], ["ext1", "ext2"])
@with_app(app_normal_results)
def test_kind_reprioritization(app):
# Just a simple test of the system as a whole.
# select a criterion, and perform re-prioritization and see if it worked.
app.select_pri_criterion("Kind")
app.pdialog.criteria_list.select([1]) # ext2
app.pdialog.criteria_list.select([1]) # ext2
app.pdialog.add_selected()
app.pdialog.perform_reprioritization()
eq_(app.rtable[0].data['name'], 'foo2.ext2')
eq_(app.rtable[0].data["name"], "foo2.ext2")
@with_app(app_normal_results)
def test_folder_subcrit(app):
app.select_pri_criterion("Folder")
eq_(app.pdialog.criteria_list[:], ['folder1', 'folder2'])
eq_(app.pdialog.criteria_list[:], ["folder1", "folder2"])
@with_app(app_normal_results)
def test_folder_reprioritization(app):
app.select_pri_criterion("Folder")
app.pdialog.criteria_list.select([1]) # folder2
app.pdialog.criteria_list.select([1]) # folder2
app.pdialog.add_selected()
app.pdialog.perform_reprioritization()
eq_(app.rtable[0].data['name'], 'foo2.ext2')
eq_(app.rtable[0].data["name"], "foo2.ext2")
@with_app(app_normal_results)
def test_prilist_display(app):
# The prioritization list displays selected criteria correctly.
app.select_pri_criterion("Kind")
app.pdialog.criteria_list.select([1]) # ext2
app.pdialog.criteria_list.select([1]) # ext2
app.pdialog.add_selected()
app.select_pri_criterion("Folder")
app.pdialog.criteria_list.select([1]) # folder2
app.pdialog.criteria_list.select([1]) # folder2
app.pdialog.add_selected()
app.select_pri_criterion("Size")
app.pdialog.criteria_list.select([1]) # Lowest
app.pdialog.criteria_list.select([1]) # Lowest
app.pdialog.add_selected()
expected = [
"Kind (ext2)",
@@ -88,23 +95,26 @@ def test_prilist_display(app):
]
eq_(app.pdialog.prioritization_list[:], expected)
@with_app(app_normal_results)
def test_size_subcrit(app):
app.select_pri_criterion("Size")
eq_(app.pdialog.criteria_list[:], ['Highest', 'Lowest'])
eq_(app.pdialog.criteria_list[:], ["Highest", "Lowest"])
@with_app(app_normal_results)
def test_size_reprioritization(app):
app.select_pri_criterion("Size")
app.pdialog.criteria_list.select([0]) # highest
app.pdialog.criteria_list.select([0]) # highest
app.pdialog.add_selected()
app.pdialog.perform_reprioritization()
eq_(app.rtable[0].data['name'], 'foo2.ext2')
eq_(app.rtable[0].data["name"], "foo2.ext2")
@with_app(app_normal_results)
def test_reorder_prioritizations(app):
app.add_pri_criterion("Kind", 0) # ext1
app.add_pri_criterion("Kind", 1) # ext2
app.add_pri_criterion("Kind", 0) # ext1
app.add_pri_criterion("Kind", 1) # ext2
app.pdialog.prioritization_list.move_indexes([1], 0)
expected = [
"Kind (ext2)",
@@ -112,6 +122,7 @@ def test_reorder_prioritizations(app):
]
eq_(app.pdialog.prioritization_list[:], expected)
@with_app(app_normal_results)
def test_remove_crit_from_list(app):
app.add_pri_criterion("Kind", 0)
@@ -123,75 +134,72 @@ def test_remove_crit_from_list(app):
]
eq_(app.pdialog.prioritization_list[:], expected)
@with_app(app_normal_results)
def test_add_crit_without_selection(app):
# Adding a criterion without having made a selection doesn't cause a crash.
app.pdialog.add_selected() # no crash
app.pdialog.add_selected() # no crash
#---
# ---
def app_one_name_ends_with_number():
dupes = [
[
no('foo.ext'),
no('foo1.ext'),
],
[no("foo.ext"), no("foo1.ext")],
]
return app_with_dupes(dupes)
@with_app(app_one_name_ends_with_number)
def test_filename_reprioritization(app):
app.add_pri_criterion("Filename", 0) # Ends with a number
app.add_pri_criterion("Filename", 0) # Ends with a number
app.pdialog.perform_reprioritization()
eq_(app.rtable[0].data['name'], 'foo1.ext')
eq_(app.rtable[0].data["name"], "foo1.ext")
#---
# ---
def app_with_subfolders():
dupes = [
[
no('foo1', folder='baz'),
no('foo2', folder='foo/bar'),
],
[
no('foo3', folder='baz'),
no('foo4', folder='foo'),
],
[no("foo1", folder="baz"), no("foo2", folder="foo/bar")],
[no("foo3", folder="baz"), no("foo4", folder="foo")],
]
return app_with_dupes(dupes)
@with_app(app_with_subfolders)
def test_folder_crit_is_sorted(app):
# Folder subcriteria are sorted.
app.select_pri_criterion("Folder")
eq_(app.pdialog.criteria_list[:], ['baz', 'foo', op.join('foo', 'bar')])
eq_(app.pdialog.criteria_list[:], ["baz", "foo", op.join("foo", "bar")])
@with_app(app_with_subfolders)
def test_folder_crit_includes_subfolders(app):
# When selecting a folder crit, dupes in a subfolder are also considered as affected by that
# crit.
app.add_pri_criterion("Folder", 1) # foo
app.add_pri_criterion("Folder", 1) # foo
app.pdialog.perform_reprioritization()
# Both foo and foo/bar dupes will be prioritized
eq_(app.rtable[0].data['name'], 'foo2')
eq_(app.rtable[2].data['name'], 'foo4')
eq_(app.rtable[0].data["name"], "foo2")
eq_(app.rtable[2].data["name"], "foo4")
@with_app(app_with_subfolders)
def test_display_something_on_empty_extensions(app):
# When there's no extension, display "None" instead of nothing at all.
app.select_pri_criterion("Kind")
eq_(app.pdialog.criteria_list[:], ['None'])
eq_(app.pdialog.criteria_list[:], ["None"])
#---
# ---
def app_one_name_longer_than_the_other():
dupes = [
[
no('shortest.ext'),
no('loooongest.ext'),
],
[no("shortest.ext"), no("loooongest.ext")],
]
return app_with_dupes(dupes)
@with_app(app_one_name_longer_than_the_other)
def test_longest_filename_prioritization(app):
app.add_pri_criterion("Filename", 2) # Longest
app.add_pri_criterion("Filename", 2) # Longest
app.pdialog.perform_reprioritization()
eq_(app.rtable[0].data['name'], 'loooongest.ext')
eq_(app.rtable[0].data["name"], "loooongest.ext")

View File

@@ -1,13 +1,14 @@
# Created By: Virgil Dupras
# Created On: 2013-07-28
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
from .base import TestApp, GetTestGroups
def app_with_results():
app = TestApp()
objects, matches, groups = GetTestGroups()
@@ -15,23 +16,26 @@ def app_with_results():
app.rtable.refresh()
return app
def test_delta_flags_delta_mode_off():
app = app_with_results()
# When the delta mode is off, we never have delta values flags
app.rtable.delta_values = False
# Ref file, always false anyway
assert not app.rtable[0].is_cell_delta('size')
assert not app.rtable[0].is_cell_delta("size")
# False because delta mode is off
assert not app.rtable[1].is_cell_delta('size')
assert not app.rtable[1].is_cell_delta("size")
def test_delta_flags_delta_mode_on_delta_columns():
# When the delta mode is on, delta columns always have a delta flag, except for ref rows
app = app_with_results()
app.rtable.delta_values = True
# Ref file, always false anyway
assert not app.rtable[0].is_cell_delta('size')
assert not app.rtable[0].is_cell_delta("size")
# But for a dupe, the flag is on
assert app.rtable[1].is_cell_delta('size')
assert app.rtable[1].is_cell_delta("size")
def test_delta_flags_delta_mode_on_non_delta_columns():
# When the delta mode is on, non-delta columns have a delta flag if their value differs from
@@ -39,11 +43,12 @@ def test_delta_flags_delta_mode_on_non_delta_columns():
app = app_with_results()
app.rtable.delta_values = True
# "bar bleh" != "foo bar", flag on
assert app.rtable[1].is_cell_delta('name')
assert app.rtable[1].is_cell_delta("name")
# "ibabtu" row, but it's a ref, flag off
assert not app.rtable[3].is_cell_delta('name')
assert not app.rtable[3].is_cell_delta("name")
# "ibabtu" == "ibabtu", flag off
assert not app.rtable[4].is_cell_delta('name')
assert not app.rtable[4].is_cell_delta("name")
def test_delta_flags_delta_mode_on_non_delta_columns_case_insensitive():
# Comparison that occurs for non-numeric columns to check whether they're delta is case
@@ -53,4 +58,4 @@ def test_delta_flags_delta_mode_on_non_delta_columns_case_insensitive():
app.app.results.groups[1].dupes[0].name = "IBaBTU"
app.rtable.delta_values = True
# "ibAbtu" == "IBaBTU", flag off
assert not app.rtable[4].is_cell_delta('name')
assert not app.rtable[4].is_cell_delta("name")

View File

@@ -17,6 +17,7 @@ from .. import engine
from .base import NamedObject, GetTestGroups, DupeGuru
from ..results import Results
class TestCaseResultsEmpty:
def setup_method(self, method):
self.app = DupeGuru()
@@ -24,8 +25,8 @@ class TestCaseResultsEmpty:
def test_apply_invalid_filter(self):
# If the applied filter is an invalid regexp, just ignore the filter.
self.results.apply_filter('[') # invalid
self.test_stat_line() # make sure that the stats line isn't saying we applied a '[' filter
self.results.apply_filter("[") # invalid
self.test_stat_line() # make sure that the stats line isn't saying we applied a '[' filter
def test_stat_line(self):
eq_("0 / 0 (0.00 B / 0.00 B) duplicates marked.", self.results.stat_line)
@@ -34,7 +35,7 @@ class TestCaseResultsEmpty:
eq_(0, len(self.results.groups))
def test_get_group_of_duplicate(self):
assert self.results.get_group_of_duplicate('foo') is None
assert self.results.get_group_of_duplicate("foo") is None
def test_save_to_xml(self):
f = io.BytesIO()
@@ -42,7 +43,7 @@ class TestCaseResultsEmpty:
f.seek(0)
doc = ET.parse(f)
root = doc.getroot()
eq_('results', root.tag)
eq_("results", root.tag)
def test_is_modified(self):
assert not self.results.is_modified
@@ -59,10 +60,10 @@ class TestCaseResultsEmpty:
# would have been some kind of feedback to the user, but the work involved for something
# that simply never happens (I never received a report of this crash, I experienced it
# while fooling around) is too much. Instead, use standard name conflict resolution.
folderpath = tmpdir.join('foo')
folderpath = tmpdir.join("foo")
folderpath.mkdir()
self.results.save_to_xml(str(folderpath)) # no crash
assert tmpdir.join('[000] foo').check()
self.results.save_to_xml(str(folderpath)) # no crash
assert tmpdir.join("[000] foo").check()
class TestCaseResultsWithSomeGroups:
@@ -116,18 +117,22 @@ class TestCaseResultsWithSomeGroups:
assert d is g.ref
def test_sort_groups(self):
self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
self.results.make_ref(
self.objects[1]
) # We want to make the 1024 sized object to go ref.
g1, g2 = self.groups
self.results.sort_groups('size')
self.results.sort_groups("size")
assert self.results.groups[0] is g2
assert self.results.groups[1] is g1
self.results.sort_groups('size', False)
self.results.sort_groups("size", False)
assert self.results.groups[0] is g1
assert self.results.groups[1] is g2
def test_set_groups_when_sorted(self):
self.results.make_ref(self.objects[1]) #We want to make the 1024 sized object to go ref.
self.results.sort_groups('size')
self.results.make_ref(
self.objects[1]
) # We want to make the 1024 sized object to go ref.
self.results.sort_groups("size")
objects, matches, groups = GetTestGroups()
g1, g2 = groups
g1.switch_ref(objects[1])
@@ -158,9 +163,9 @@ class TestCaseResultsWithSomeGroups:
o3.size = 3
o4.size = 2
o5.size = 1
self.results.sort_dupes('size')
self.results.sort_dupes("size")
eq_([o5, o3, o2], self.results.dupes)
self.results.sort_dupes('size', False)
self.results.sort_dupes("size", False)
eq_([o2, o3, o5], self.results.dupes)
def test_dupe_list_remember_sort(self):
@@ -170,25 +175,25 @@ class TestCaseResultsWithSomeGroups:
o3.size = 3
o4.size = 2
o5.size = 1
self.results.sort_dupes('size')
self.results.sort_dupes("size")
self.results.make_ref(o2)
eq_([o5, o3, o1], self.results.dupes)
def test_dupe_list_sort_delta_values(self):
o1, o2, o3, o4, o5 = self.objects
o1.size = 10
o2.size = 2 #-8
o3.size = 3 #-7
o2.size = 2 # -8
o3.size = 3 # -7
o4.size = 20
o5.size = 1 #-19
self.results.sort_dupes('size', delta=True)
o5.size = 1 # -19
self.results.sort_dupes("size", delta=True)
eq_([o5, o2, o3], self.results.dupes)
def test_sort_empty_list(self):
#There was an infinite loop when sorting an empty list.
# There was an infinite loop when sorting an empty list.
app = DupeGuru()
r = app.results
r.sort_dupes('name')
r.sort_dupes("name")
eq_([], r.dupes)
def test_dupe_list_update_on_remove_duplicates(self):
@@ -209,7 +214,7 @@ class TestCaseResultsWithSomeGroups:
f = io.BytesIO()
self.results.save_to_xml(f)
assert not self.results.is_modified
self.results.groups = self.groups # sets the flag back
self.results.groups = self.groups # sets the flag back
f.seek(0)
self.results.load_from_xml(f, get_file)
assert not self.results.is_modified
@@ -236,7 +241,7 @@ class TestCaseResultsWithSomeGroups:
# "aaa" makes our dupe go first in alphabetical order, but since we have the same value as
# ref, we're going last.
g2r.name = g2d1.name = "aaa"
self.results.sort_dupes('name', delta=True)
self.results.sort_dupes("name", delta=True)
eq_("aaa", self.results.dupes[2].name)
def test_dupe_list_sort_delta_values_nonnumeric_case_insensitive(self):
@@ -244,9 +249,10 @@ class TestCaseResultsWithSomeGroups:
g1r, g1d1, g1d2, g2r, g2d1 = self.objects
g2r.name = "AaA"
g2d1.name = "aAa"
self.results.sort_dupes('name', delta=True)
self.results.sort_dupes("name", delta=True)
eq_("aAa", self.results.dupes[2].name)
class TestCaseResultsWithSavedResults:
def setup_method(self, method):
self.app = DupeGuru()
@@ -266,7 +272,7 @@ class TestCaseResultsWithSavedResults:
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
self.results.groups = self.groups # sets the flag back
self.results.groups = self.groups # sets the flag back
self.results.load_from_xml(self.f, get_file)
assert not self.results.is_modified
@@ -299,7 +305,7 @@ class TestCaseResultsMarkings:
self.results.mark(self.objects[2])
self.results.mark(self.objects[4])
eq_("2 / 3 (2.00 B / 1.01 KB) duplicates marked.", self.results.stat_line)
self.results.mark(self.objects[0]) #this is a ref, it can't be counted
self.results.mark(self.objects[0]) # this is a ref, it can't be counted
eq_("2 / 3 (2.00 B / 1.01 KB) duplicates marked.", self.results.stat_line)
self.results.groups = self.groups
eq_("0 / 3 (0.00 B / 1.01 KB) duplicates marked.", self.results.stat_line)
@@ -335,7 +341,7 @@ class TestCaseResultsMarkings:
def log_object(o):
log.append(o)
if o is self.objects[1]:
raise EnvironmentError('foobar')
raise EnvironmentError("foobar")
log = []
self.results.mark_all()
@@ -350,7 +356,7 @@ class TestCaseResultsMarkings:
eq_(len(self.results.problems), 1)
dupe, msg = self.results.problems[0]
assert dupe is self.objects[1]
eq_(msg, 'foobar')
eq_(msg, "foobar")
def test_perform_on_marked_with_ref(self):
def log_object(o):
@@ -408,20 +414,20 @@ class TestCaseResultsMarkings:
f.seek(0)
doc = ET.parse(f)
root = doc.getroot()
g1, g2 = root.getiterator('group')
d1, d2, d3 = g1.getiterator('file')
eq_('n', d1.get('marked'))
eq_('n', d2.get('marked'))
eq_('y', d3.get('marked'))
d1, d2 = g2.getiterator('file')
eq_('n', d1.get('marked'))
eq_('y', d2.get('marked'))
g1, g2 = root.getiterator("group")
d1, d2, d3 = g1.getiterator("file")
eq_("n", d1.get("marked"))
eq_("n", d2.get("marked"))
eq_("y", d3.get("marked"))
d1, d2 = g2.getiterator("file")
eq_("n", d1.get("marked"))
eq_("y", d2.get("marked"))
def test_LoadXML(self):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
self.results.mark(self.objects[1])
self.results.mark_invert()
f = io.BytesIO()
@@ -444,51 +450,51 @@ class TestCaseResultsXML:
self.objects, self.matches, self.groups = GetTestGroups()
self.results.groups = self.groups
def get_file(self, path): # use this as a callback for load_from_xml
def get_file(self, path): # use this as a callback for load_from_xml
return [o for o in self.objects if o.path == path][0]
def test_save_to_xml(self):
self.objects[0].is_ref = True
self.objects[0].words = [['foo', 'bar']]
self.objects[0].words = [["foo", "bar"]]
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
doc = ET.parse(f)
root = doc.getroot()
eq_('results', root.tag)
eq_("results", root.tag)
eq_(2, len(root))
eq_(2, len([c for c in root if c.tag == 'group']))
eq_(2, len([c for c in root if c.tag == "group"]))
g1, g2 = root
eq_(6, len(g1))
eq_(3, len([c for c in g1 if c.tag == 'file']))
eq_(3, len([c for c in g1 if c.tag == 'match']))
d1, d2, d3 = [c for c in g1 if c.tag == 'file']
eq_(op.join('basepath', 'foo bar'), d1.get('path'))
eq_(op.join('basepath', 'bar bleh'), d2.get('path'))
eq_(op.join('basepath', 'foo bleh'), d3.get('path'))
eq_('y', d1.get('is_ref'))
eq_('n', d2.get('is_ref'))
eq_('n', d3.get('is_ref'))
eq_('foo,bar', d1.get('words'))
eq_('bar,bleh', d2.get('words'))
eq_('foo,bleh', d3.get('words'))
eq_(3, len([c for c in g1 if c.tag == "file"]))
eq_(3, len([c for c in g1 if c.tag == "match"]))
d1, d2, d3 = [c for c in g1 if c.tag == "file"]
eq_(op.join("basepath", "foo bar"), d1.get("path"))
eq_(op.join("basepath", "bar bleh"), d2.get("path"))
eq_(op.join("basepath", "foo bleh"), d3.get("path"))
eq_("y", d1.get("is_ref"))
eq_("n", d2.get("is_ref"))
eq_("n", d3.get("is_ref"))
eq_("foo,bar", d1.get("words"))
eq_("bar,bleh", d2.get("words"))
eq_("foo,bleh", d3.get("words"))
eq_(3, len(g2))
eq_(2, len([c for c in g2 if c.tag == 'file']))
eq_(1, len([c for c in g2 if c.tag == 'match']))
d1, d2 = [c for c in g2 if c.tag == 'file']
eq_(op.join('basepath', 'ibabtu'), d1.get('path'))
eq_(op.join('basepath', 'ibabtu'), d2.get('path'))
eq_('n', d1.get('is_ref'))
eq_('n', d2.get('is_ref'))
eq_('ibabtu', d1.get('words'))
eq_('ibabtu', d2.get('words'))
eq_(2, len([c for c in g2 if c.tag == "file"]))
eq_(1, len([c for c in g2 if c.tag == "match"]))
d1, d2 = [c for c in g2 if c.tag == "file"]
eq_(op.join("basepath", "ibabtu"), d1.get("path"))
eq_(op.join("basepath", "ibabtu"), d2.get("path"))
eq_("n", d1.get("is_ref"))
eq_("n", d2.get("is_ref"))
eq_("ibabtu", d1.get("words"))
eq_("ibabtu", d2.get("words"))
def test_LoadXML(self):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
self.objects[0].is_ref = True
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
@@ -504,23 +510,23 @@ class TestCaseResultsXML:
assert g1[0] is self.objects[0]
assert g1[1] is self.objects[1]
assert g1[2] is self.objects[2]
eq_(['foo', 'bar'], g1[0].words)
eq_(['bar', 'bleh'], g1[1].words)
eq_(['foo', 'bleh'], g1[2].words)
eq_(["foo", "bar"], g1[0].words)
eq_(["bar", "bleh"], g1[1].words)
eq_(["foo", "bleh"], g1[2].words)
eq_(2, len(g2))
assert not g2[0].is_ref
assert not g2[1].is_ref
assert g2[0] is self.objects[3]
assert g2[1] is self.objects[4]
eq_(['ibabtu'], g2[0].words)
eq_(['ibabtu'], g2[1].words)
eq_(["ibabtu"], g2[0].words)
eq_(["ibabtu"], g2[1].words)
def test_LoadXML_with_filename(self, tmpdir):
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
filename = str(tmpdir.join('dupeguru_results.xml'))
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
filename = str(tmpdir.join("dupeguru_results.xml"))
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
self.results.save_to_xml(filename)
app = DupeGuru()
r = Results(app)
@@ -529,11 +535,11 @@ class TestCaseResultsXML:
def test_LoadXML_with_some_files_that_dont_exist_anymore(self):
def get_file(path):
if path.endswith('ibabtu 2'):
if path.endswith("ibabtu 2"):
return None
return [f for f in self.objects if str(f.path) == path][0]
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
f = io.BytesIO()
self.results.save_to_xml(f)
f.seek(0)
@@ -547,36 +553,36 @@ class TestCaseResultsXML:
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
root = ET.Element('foobar') #The root element shouldn't matter, really.
group_node = ET.SubElement(root, 'group')
dupe_node = ET.SubElement(group_node, 'file') #Perfectly correct file
dupe_node.set('path', op.join('basepath', 'foo bar'))
dupe_node.set('is_ref', 'y')
dupe_node.set('words', 'foo, bar')
dupe_node = ET.SubElement(group_node, 'file') #is_ref missing, default to 'n'
dupe_node.set('path', op.join('basepath', 'foo bleh'))
dupe_node.set('words', 'foo, bleh')
dupe_node = ET.SubElement(group_node, 'file') #words are missing, valid.
dupe_node.set('path', op.join('basepath', 'bar bleh'))
dupe_node = ET.SubElement(group_node, 'file') #path is missing, invalid.
dupe_node.set('words', 'foo, bleh')
dupe_node = ET.SubElement(group_node, 'foobar') #Invalid element name
dupe_node.set('path', op.join('basepath', 'bar bleh'))
dupe_node.set('is_ref', 'y')
dupe_node.set('words', 'bar, bleh')
match_node = ET.SubElement(group_node, 'match') # match pointing to a bad index
match_node.set('first', '42')
match_node.set('second', '45')
match_node = ET.SubElement(group_node, 'match') # match with missing attrs
match_node = ET.SubElement(group_node, 'match') # match with non-int values
match_node.set('first', 'foo')
match_node.set('second', 'bar')
match_node.set('percentage', 'baz')
group_node = ET.SubElement(root, 'foobar') #invalid group
group_node = ET.SubElement(root, 'group') #empty group
root = ET.Element("foobar") # The root element shouldn't matter, really.
group_node = ET.SubElement(root, "group")
dupe_node = ET.SubElement(group_node, "file") # Perfectly correct file
dupe_node.set("path", op.join("basepath", "foo bar"))
dupe_node.set("is_ref", "y")
dupe_node.set("words", "foo, bar")
dupe_node = ET.SubElement(group_node, "file") # is_ref missing, default to 'n'
dupe_node.set("path", op.join("basepath", "foo bleh"))
dupe_node.set("words", "foo, bleh")
dupe_node = ET.SubElement(group_node, "file") # words are missing, valid.
dupe_node.set("path", op.join("basepath", "bar bleh"))
dupe_node = ET.SubElement(group_node, "file") # path is missing, invalid.
dupe_node.set("words", "foo, bleh")
dupe_node = ET.SubElement(group_node, "foobar") # Invalid element name
dupe_node.set("path", op.join("basepath", "bar bleh"))
dupe_node.set("is_ref", "y")
dupe_node.set("words", "bar, bleh")
match_node = ET.SubElement(group_node, "match") # match pointing to a bad index
match_node.set("first", "42")
match_node.set("second", "45")
match_node = ET.SubElement(group_node, "match") # match with missing attrs
match_node = ET.SubElement(group_node, "match") # match with non-int values
match_node.set("first", "foo")
match_node.set("second", "bar")
match_node.set("percentage", "baz")
group_node = ET.SubElement(root, "foobar") # invalid group
group_node = ET.SubElement(root, "group") # empty group
f = io.BytesIO()
tree = ET.ElementTree(root)
tree.write(f, encoding='utf-8')
tree.write(f, encoding="utf-8")
f.seek(0)
app = DupeGuru()
r = Results(app)
@@ -586,16 +592,18 @@ class TestCaseResultsXML:
def test_xml_non_ascii(self):
def get_file(path):
if path == op.join('basepath', '\xe9foo bar'):
if path == op.join("basepath", "\xe9foo bar"):
return objects[0]
if path == op.join('basepath', 'bar bleh'):
if path == op.join("basepath", "bar bleh"):
return objects[1]
objects = [NamedObject("\xe9foo bar", True), NamedObject("bar bleh", True)]
matches = engine.getmatches(objects) #we should have 5 matches
groups = engine.get_groups(matches) #We should have 2 groups
matches = engine.getmatches(objects) # we should have 5 matches
groups = engine.get_groups(matches) # We should have 2 groups
for g in groups:
g.prioritize(lambda x: objects.index(x)) #We want the dupes to be in the same order as the list is
g.prioritize(
lambda x: objects.index(x)
) # We want the dupes to be in the same order as the list is
app = DupeGuru()
results = Results(app)
results.groups = groups
@@ -607,11 +615,11 @@ class TestCaseResultsXML:
r.load_from_xml(f, get_file)
g = r.groups[0]
eq_("\xe9foo bar", g[0].name)
eq_(['efoo', 'bar'], g[0].words)
eq_(["efoo", "bar"], g[0].words)
def test_load_invalid_xml(self):
f = io.BytesIO()
f.write(b'<this is invalid')
f.write(b"<this is invalid")
f.seek(0)
app = DupeGuru()
r = Results(app)
@@ -623,7 +631,7 @@ class TestCaseResultsXML:
app = DupeGuru()
r = Results(app)
with raises(IOError):
r.load_from_xml('does_not_exist.xml', None)
r.load_from_xml("does_not_exist.xml", None)
eq_(0, len(r.groups))
def test_remember_match_percentage(self):
@@ -643,12 +651,12 @@ class TestCaseResultsXML:
results.load_from_xml(f, self.get_file)
group = results.groups[0]
d1, d2, d3 = group
match = group.get_match_of(d2) #d1 - d2
match = group.get_match_of(d2) # d1 - d2
eq_(42, match[2])
match = group.get_match_of(d3) #d1 - d3
match = group.get_match_of(d3) # d1 - d3
eq_(43, match[2])
group.switch_ref(d2)
match = group.get_match_of(d3) #d2 - d3
match = group.get_match_of(d3) # d2 - d3
eq_(46, match[2])
def test_save_and_load(self):
@@ -661,13 +669,13 @@ class TestCaseResultsXML:
def test_apply_filter_works_on_paths(self):
# apply_filter() searches on the whole path, not just on the filename.
self.results.apply_filter('basepath')
self.results.apply_filter("basepath")
eq_(len(self.results.groups), 2)
def test_save_xml_with_invalid_characters(self):
# Don't crash when saving files that have invalid xml characters in their path
self.objects[0].name = 'foo\x19'
self.results.save_to_xml(io.BytesIO()) # don't crash
self.objects[0].name = "foo\x19"
self.results.save_to_xml(io.BytesIO()) # don't crash
class TestCaseResultsFilter:
@@ -676,7 +684,7 @@ class TestCaseResultsFilter:
self.results = self.app.results
self.objects, self.matches, self.groups = GetTestGroups()
self.results.groups = self.groups
self.results.apply_filter(r'foo')
self.results.apply_filter(r"foo")
def test_groups(self):
eq_(1, len(self.results.groups))
@@ -694,7 +702,7 @@ class TestCaseResultsFilter:
def test_dupes_reconstructed_filtered(self):
# make_ref resets self.__dupes to None. When it's reconstructed, we want it filtered
dupe = self.results.dupes[0] #3rd object
dupe = self.results.dupes[0] # 3rd object
self.results.make_ref(dupe)
eq_(1, len(self.results.dupes))
assert self.results.dupes[0] is self.objects[0]
@@ -702,23 +710,23 @@ class TestCaseResultsFilter:
def test_include_ref_dupes_in_filter(self):
# When only the ref of a group match the filter, include it in the group
self.results.apply_filter(None)
self.results.apply_filter(r'foo bar')
self.results.apply_filter(r"foo bar")
eq_(1, len(self.results.groups))
eq_(0, len(self.results.dupes))
def test_filters_build_on_one_another(self):
self.results.apply_filter(r'bar')
self.results.apply_filter(r"bar")
eq_(1, len(self.results.groups))
eq_(0, len(self.results.dupes))
def test_stat_line(self):
expected = '0 / 1 (0.00 B / 1.00 B) duplicates marked. filter: foo'
expected = "0 / 1 (0.00 B / 1.00 B) duplicates marked. filter: foo"
eq_(expected, self.results.stat_line)
self.results.apply_filter(r'bar')
expected = '0 / 0 (0.00 B / 0.00 B) duplicates marked. filter: foo --> bar'
self.results.apply_filter(r"bar")
expected = "0 / 0 (0.00 B / 0.00 B) duplicates marked. filter: foo --> bar"
eq_(expected, self.results.stat_line)
self.results.apply_filter(None)
expected = '0 / 3 (0.00 B / 1.01 KB) duplicates marked.'
expected = "0 / 3 (0.00 B / 1.01 KB) duplicates marked."
eq_(expected, self.results.stat_line)
def test_mark_count_is_filtered_as_well(self):
@@ -726,8 +734,8 @@ class TestCaseResultsFilter:
# We don't want to perform mark_all() because we want the mark list to contain objects
for dupe in self.results.dupes:
self.results.mark(dupe)
self.results.apply_filter(r'foo')
expected = '1 / 1 (1.00 B / 1.00 B) duplicates marked. filter: foo'
self.results.apply_filter(r"foo")
expected = "1 / 1 (1.00 B / 1.00 B) duplicates marked. filter: foo"
eq_(expected, self.results.stat_line)
def test_mark_all_only_affects_filtered_items(self):
@@ -739,22 +747,22 @@ class TestCaseResultsFilter:
def test_sort_groups(self):
self.results.apply_filter(None)
self.results.make_ref(self.objects[1]) # to have the 1024 b obkect as ref
self.results.make_ref(self.objects[1]) # to have the 1024 b obkect as ref
g1, g2 = self.groups
self.results.apply_filter('a') # Matches both group
self.results.sort_groups('size')
self.results.apply_filter("a") # Matches both group
self.results.sort_groups("size")
assert self.results.groups[0] is g2
assert self.results.groups[1] is g1
self.results.apply_filter(None)
assert self.results.groups[0] is g2
assert self.results.groups[1] is g1
self.results.sort_groups('size', False)
self.results.apply_filter('a')
self.results.sort_groups("size", False)
self.results.apply_filter("a")
assert self.results.groups[1] is g2
assert self.results.groups[0] is g1
def test_set_group(self):
#We want the new group to be filtered
# We want the new group to be filtered
self.objects, self.matches, self.groups = GetTestGroups()
self.results.groups = self.groups
eq_(1, len(self.results.groups))
@@ -764,12 +772,12 @@ class TestCaseResultsFilter:
def get_file(path):
return [f for f in self.objects if str(f.path) == path][0]
filename = str(tmpdir.join('dupeguru_results.xml'))
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
filename = str(tmpdir.join("dupeguru_results.xml"))
self.objects[4].name = "ibabtu 2" # we can't have 2 files with the same path
self.results.save_to_xml(filename)
app = DupeGuru()
r = Results(app)
r.apply_filter('foo')
r.apply_filter("foo")
r.load_from_xml(filename, get_file)
eq_(2, len(r.groups))
@@ -778,7 +786,7 @@ class TestCaseResultsFilter:
self.results.apply_filter(None)
eq_(2, len(self.results.groups))
eq_(2, len(self.results.dupes))
self.results.apply_filter('ibabtu')
self.results.apply_filter("ibabtu")
self.results.remove_duplicates([self.results.dupes[0]])
self.results.apply_filter(None)
eq_(1, len(self.results.groups))
@@ -786,7 +794,7 @@ class TestCaseResultsFilter:
def test_filter_is_case_insensitive(self):
self.results.apply_filter(None)
self.results.apply_filter('FOO')
self.results.apply_filter("FOO")
eq_(1, len(self.results.dupes))
def test_make_ref_on_filtered_out_doesnt_mess_stats(self):
@@ -794,13 +802,15 @@ class TestCaseResultsFilter:
# When calling make_ref on such a dupe, the total size and dupecount stats gets messed up
# because they are *not* counted in the stats in the first place.
g1, g2 = self.groups
bar_bleh = g1[1] # The "bar bleh" dupe is filtered out
bar_bleh = g1[1] # The "bar bleh" dupe is filtered out
self.results.make_ref(bar_bleh)
# Now the stats should display *2* markable dupes (instead of 1)
expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo'
expected = "0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo"
eq_(expected, self.results.stat_line)
self.results.apply_filter(None) # Now let's make sure our unfiltered results aren't fucked up
expected = '0 / 3 (0.00 B / 3.00 B) duplicates marked.'
self.results.apply_filter(
None
) # Now let's make sure our unfiltered results aren't fucked up
expected = "0 / 3 (0.00 B / 3.00 B) duplicates marked."
eq_(expected, self.results.stat_line)
@@ -814,6 +824,5 @@ class TestCaseResultsRefFile:
self.results.groups = self.groups
def test_stat_line(self):
expected = '0 / 2 (0.00 B / 2.00 B) duplicates marked.'
expected = "0 / 2 (0.00 B / 2.00 B) duplicates marked."
eq_(expected, self.results.stat_line)

View File

@@ -14,6 +14,7 @@ from ..ignore import IgnoreList
from ..scanner import Scanner, ScanType
from ..me.scanner import ScannerME
class NamedObject:
def __init__(self, name="foobar", size=1, path=None):
if path is None:
@@ -26,22 +27,25 @@ class NamedObject:
self.words = getwords(name)
def __repr__(self):
return '<NamedObject %r %r>' % (self.name, self.path)
return "<NamedObject %r %r>" % (self.name, self.path)
no = NamedObject
def pytest_funcarg__fake_fileexists(request):
# This is a hack to avoid invalidating all previous tests since the scanner started to test
# for file existence before doing the match grouping.
monkeypatch = request.getfuncargvalue('monkeypatch')
monkeypatch.setattr(Path, 'exists', lambda _: True)
monkeypatch = request.getfuncargvalue("monkeypatch")
monkeypatch.setattr(Path, "exists", lambda _: True)
def test_empty(fake_fileexists):
s = Scanner()
r = s.get_dupe_groups([])
eq_(r, [])
def test_default_settings(fake_fileexists):
s = Scanner()
eq_(s.min_match_percentage, 80)
@@ -50,40 +54,54 @@ def test_default_settings(fake_fileexists):
eq_(s.word_weighting, False)
eq_(s.match_similar_words, False)
def test_simple_with_default_settings(fake_fileexists):
s = Scanner()
f = [no('foo bar', path='p1'), no('foo bar', path='p2'), no('foo bleh')]
f = [no("foo bar", path="p1"), no("foo bar", path="p2"), no("foo bleh")]
r = s.get_dupe_groups(f)
eq_(len(r), 1)
g = r[0]
#'foo bleh' cannot be in the group because the default min match % is 80
# 'foo bleh' cannot be in the group because the default min match % is 80
eq_(len(g), 2)
assert g.ref in f[:2]
assert g.dupes[0] in f[:2]
def test_simple_with_lower_min_match(fake_fileexists):
s = Scanner()
s.min_match_percentage = 50
f = [no('foo bar', path='p1'), no('foo bar', path='p2'), no('foo bleh')]
f = [no("foo bar", path="p1"), no("foo bar", path="p2"), no("foo bleh")]
r = s.get_dupe_groups(f)
eq_(len(r), 1)
g = r[0]
eq_(len(g), 3)
def test_trim_all_ref_groups(fake_fileexists):
# When all files of a group are ref, don't include that group in the results, but also don't
# count the files from that group as discarded.
s = Scanner()
f = [no('foo', path='p1'), no('foo', path='p2'), no('bar', path='p1'), no('bar', path='p2')]
f = [
no("foo", path="p1"),
no("foo", path="p2"),
no("bar", path="p1"),
no("bar", path="p2"),
]
f[2].is_ref = True
f[3].is_ref = True
r = s.get_dupe_groups(f)
eq_(len(r), 1)
eq_(s.discarded_file_count, 0)
def test_priorize(fake_fileexists):
s = Scanner()
f = [no('foo', path='p1'), no('foo', path='p2'), no('bar', path='p1'), no('bar', path='p2')]
f = [
no("foo", path="p1"),
no("foo", path="p2"),
no("bar", path="p1"),
no("bar", path="p2"),
]
f[1].size = 2
f[2].size = 3
f[3].is_ref = True
@@ -94,17 +112,19 @@ def test_priorize(fake_fileexists):
assert f[3] in (g1.ref, g2.ref)
assert f[2] in (g1.dupes[0], g2.dupes[0])
def test_content_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
f = [no('foo'), no('bar'), no('bleh')]
f[0].md5 = f[0].md5partial = 'foobar'
f[1].md5 = f[1].md5partial = 'foobar'
f[2].md5 = f[2].md5partial = 'bleh'
f = [no("foo"), no("bar"), no("bleh")]
f[0].md5 = f[0].md5partial = "foobar"
f[1].md5 = f[1].md5partial = "foobar"
f[2].md5 = f[2].md5partial = "bleh"
r = s.get_dupe_groups(f)
eq_(len(r), 1)
eq_(len(r[0]), 2)
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
def test_content_scan_compare_sizes_first(fake_fileexists):
class MyFile(no):
@@ -114,16 +134,17 @@ def test_content_scan_compare_sizes_first(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
f = [MyFile('foo', 1), MyFile('bar', 2)]
f = [MyFile("foo", 1), MyFile("bar", 2)]
eq_(len(s.get_dupe_groups(f)), 0)
def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
f = [no('foo'), no('bar'), no('bleh')]
f[0].md5 = f[0].md5partial = 'foobar'
f[1].md5 = f[1].md5partial = 'foobar'
f[2].md5 = f[2].md5partial = 'bleh'
f = [no("foo"), no("bar"), no("bleh")]
f[0].md5 = f[0].md5partial = "foobar"
f[1].md5 = f[1].md5partial = "foobar"
f[2].md5 = f[2].md5partial = "bleh"
s.min_match_percentage = 101
r = s.get_dupe_groups(f)
eq_(len(r), 1)
@@ -133,157 +154,180 @@ def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
eq_(len(r), 1)
eq_(len(r[0]), 2)
def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
f = [no('foo'), no('bar')]
f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
f = [no("foo"), no("bar")]
f[0].md5 = f[
0
].md5partial = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
f[1].md5 = f[
1
].md5partial = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
r = s.get_dupe_groups(f)
r[0]
def test_extension_is_not_counted_in_filename_scan(fake_fileexists):
s = Scanner()
s.min_match_percentage = 100
f = [no('foo.bar'), no('foo.bleh')]
f = [no("foo.bar"), no("foo.bleh")]
r = s.get_dupe_groups(f)
eq_(len(r), 1)
eq_(len(r[0]), 2)
def test_job(fake_fileexists):
def do_progress(progress, desc=''):
def do_progress(progress, desc=""):
log.append(progress)
return True
s = Scanner()
log = []
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
f = [no("foo bar"), no("foo bar"), no("foo bleh")]
s.get_dupe_groups(f, j=job.Job(1, do_progress))
eq_(log[0], 0)
eq_(log[-1], 100)
def test_mix_file_kind(fake_fileexists):
s = Scanner()
s.mix_file_kind = False
f = [no('foo.1'), no('foo.2')]
f = [no("foo.1"), no("foo.2")]
r = s.get_dupe_groups(f)
eq_(len(r), 0)
def test_word_weighting(fake_fileexists):
s = Scanner()
s.min_match_percentage = 75
s.word_weighting = True
f = [no('foo bar'), no('foo bar bleh')]
f = [no("foo bar"), no("foo bar bleh")]
r = s.get_dupe_groups(f)
eq_(len(r), 1)
g = r[0]
m = g.get_match_of(g.dupes[0])
eq_(m.percentage, 75) # 16 letters, 12 matching
eq_(m.percentage, 75) # 16 letters, 12 matching
def test_similar_words(fake_fileexists):
s = Scanner()
s.match_similar_words = True
f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
f = [
no("The White Stripes"),
no("The Whites Stripe"),
no("Limp Bizkit"),
no("Limp Bizkitt"),
]
r = s.get_dupe_groups(f)
eq_(len(r), 2)
def test_fields(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Fields
f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
f = [no("The White Stripes - Little Ghost"), no("The White Stripes - Little Acorn")]
r = s.get_dupe_groups(f)
eq_(len(r), 0)
def test_fields_no_order(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.FieldsNoOrder
f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
f = [no("The White Stripes - Little Ghost"), no("Little Ghost - The White Stripes")]
r = s.get_dupe_groups(f)
eq_(len(r), 1)
def test_tag_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes'
o1.title = 'The Air Near My Fingers'
o2.artist = 'The White Stripes'
o2.title = 'The Air Near My Fingers'
o1 = no("foo")
o2 = no("bar")
o1.artist = "The White Stripes"
o1.title = "The Air Near My Fingers"
o2.artist = "The White Stripes"
o2.title = "The Air Near My Fingers"
r = s.get_dupe_groups([o1, o2])
eq_(len(r), 1)
def test_tag_with_album_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scanned_tags = set(['artist', 'album', 'title'])
o1 = no('foo')
o2 = no('bar')
o3 = no('bleh')
o1.artist = 'The White Stripes'
o1.title = 'The Air Near My Fingers'
o1.album = 'Elephant'
o2.artist = 'The White Stripes'
o2.title = 'The Air Near My Fingers'
o2.album = 'Elephant'
o3.artist = 'The White Stripes'
o3.title = 'The Air Near My Fingers'
o3.album = 'foobar'
s.scanned_tags = set(["artist", "album", "title"])
o1 = no("foo")
o2 = no("bar")
o3 = no("bleh")
o1.artist = "The White Stripes"
o1.title = "The Air Near My Fingers"
o1.album = "Elephant"
o2.artist = "The White Stripes"
o2.title = "The Air Near My Fingers"
o2.album = "Elephant"
o3.artist = "The White Stripes"
o3.title = "The Air Near My Fingers"
o3.album = "foobar"
r = s.get_dupe_groups([o1, o2, o3])
eq_(len(r), 1)
def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scanned_tags = set(['artist', 'album', 'title'])
s.scanned_tags = set(["artist", "album", "title"])
s.min_match_percentage = 50
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes - a'
o1.title = 'The Air Near My Fingers - a'
o1.album = 'Elephant - a'
o2.artist = 'The White Stripes - b'
o2.title = 'The Air Near My Fingers - b'
o2.album = 'Elephant - b'
o1 = no("foo")
o2 = no("bar")
o1.artist = "The White Stripes - a"
o1.title = "The Air Near My Fingers - a"
o1.album = "Elephant - a"
o2.artist = "The White Stripes - b"
o2.title = "The Air Near My Fingers - b"
o2.album = "Elephant - b"
r = s.get_dupe_groups([o1, o2])
eq_(len(r), 1)
def test_tag_scan_with_different_scanned(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scanned_tags = set(['track', 'year'])
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes'
o1.title = 'some title'
o1.track = 'foo'
o1.year = 'bar'
o2.artist = 'The White Stripes'
o2.title = 'another title'
o2.track = 'foo'
o2.year = 'bar'
s.scanned_tags = set(["track", "year"])
o1 = no("foo")
o2 = no("bar")
o1.artist = "The White Stripes"
o1.title = "some title"
o1.track = "foo"
o1.year = "bar"
o2.artist = "The White Stripes"
o2.title = "another title"
o2.track = "foo"
o2.year = "bar"
r = s.get_dupe_groups([o1, o2])
eq_(len(r), 1)
def test_tag_scan_only_scans_existing_tags(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scanned_tags = set(['artist', 'foo'])
o1 = no('foo')
o2 = no('bar')
o1.artist = 'The White Stripes'
o1.foo = 'foo'
o2.artist = 'The White Stripes'
o2.foo = 'bar'
s.scanned_tags = set(["artist", "foo"])
o1 = no("foo")
o2 = no("bar")
o1.artist = "The White Stripes"
o1.foo = "foo"
o2.artist = "The White Stripes"
o2.foo = "bar"
r = s.get_dupe_groups([o1, o2])
eq_(len(r), 1) # Because 'foo' is not scanned, they match
eq_(len(r), 1) # Because 'foo' is not scanned, they match
def test_tag_scan_converts_to_str(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scanned_tags = set(['track'])
o1 = no('foo')
o2 = no('bar')
s.scanned_tags = set(["track"])
o1 = no("foo")
o2 = no("bar")
o1.track = 42
o2.track = 42
try:
@@ -292,28 +336,30 @@ def test_tag_scan_converts_to_str(fake_fileexists):
raise AssertionError()
eq_(len(r), 1)
def test_tag_scan_non_ascii(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scanned_tags = set(['title'])
o1 = no('foo')
o2 = no('bar')
o1.title = 'foobar\u00e9'
o2.title = 'foobar\u00e9'
s.scanned_tags = set(["title"])
o1 = no("foo")
o2 = no("bar")
o1.title = "foobar\u00e9"
o2.title = "foobar\u00e9"
try:
r = s.get_dupe_groups([o1, o2])
except UnicodeEncodeError:
raise AssertionError()
eq_(len(r), 1)
def test_ignore_list(fake_fileexists):
s = Scanner()
f1 = no('foobar')
f2 = no('foobar')
f3 = no('foobar')
f1.path = Path('dir1/foobar')
f2.path = Path('dir2/foobar')
f3.path = Path('dir3/foobar')
f1 = no("foobar")
f2 = no("foobar")
f3 = no("foobar")
f1.path = Path("dir1/foobar")
f2.path = Path("dir2/foobar")
f3.path = Path("dir3/foobar")
ignore_list = IgnoreList()
ignore_list.Ignore(str(f1.path), str(f2.path))
ignore_list.Ignore(str(f1.path), str(f3.path))
@@ -327,16 +373,17 @@ def test_ignore_list(fake_fileexists):
# Ignored matches are not counted as discarded
eq_(s.discarded_file_count, 0)
def test_ignore_list_checks_for_unicode(fake_fileexists):
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
#be unicode(path)
# scanner was calling path_str for ignore list checks. Since the Path changes, it must
# be unicode(path)
s = Scanner()
f1 = no('foobar')
f2 = no('foobar')
f3 = no('foobar')
f1.path = Path('foo1\u00e9')
f2.path = Path('foo2\u00e9')
f3.path = Path('foo3\u00e9')
f1 = no("foobar")
f2 = no("foobar")
f3 = no("foobar")
f1.path = Path("foo1\u00e9")
f2.path = Path("foo2\u00e9")
f3.path = Path("foo3\u00e9")
ignore_list = IgnoreList()
ignore_list.Ignore(str(f1.path), str(f2.path))
ignore_list.Ignore(str(f1.path), str(f3.path))
@@ -348,6 +395,7 @@ def test_ignore_list_checks_for_unicode(fake_fileexists):
assert f2 in g
assert f3 in g
def test_file_evaluates_to_false(fake_fileexists):
# A very wrong way to use any() was added at some point, causing resulting group list
# to be empty.
@@ -355,19 +403,19 @@ def test_file_evaluates_to_false(fake_fileexists):
def __bool__(self):
return False
s = Scanner()
f1 = FalseNamedObject('foobar', path='p1')
f2 = FalseNamedObject('foobar', path='p2')
f1 = FalseNamedObject("foobar", path="p1")
f2 = FalseNamedObject("foobar", path="p2")
r = s.get_dupe_groups([f1, f2])
eq_(len(r), 1)
def test_size_threshold(fake_fileexists):
# Only file equal or higher than the size_threshold in size are scanned
s = Scanner()
f1 = no('foo', 1, path='p1')
f2 = no('foo', 2, path='p2')
f3 = no('foo', 3, path='p3')
f1 = no("foo", 1, path="p1")
f2 = no("foo", 2, path="p2")
f3 = no("foo", 3, path="p3")
s.size_threshold = 2
groups = s.get_dupe_groups([f1, f2, f3])
eq_(len(groups), 1)
@@ -377,48 +425,52 @@ def test_size_threshold(fake_fileexists):
assert f2 in group
assert f3 in group
def test_tie_breaker_path_deepness(fake_fileexists):
# If there is a tie in prioritization, path deepness is used as a tie breaker
s = Scanner()
o1, o2 = no('foo'), no('foo')
o1.path = Path('foo')
o2.path = Path('foo/bar')
o1, o2 = no("foo"), no("foo")
o1.path = Path("foo")
o2.path = Path("foo/bar")
[group] = s.get_dupe_groups([o1, o2])
assert group.ref is o2
def test_tie_breaker_copy(fake_fileexists):
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
s = Scanner()
o1, o2 = no('foo bar Copy'), no('foo bar')
o1.path = Path('deeper/path')
o2.path = Path('foo')
o1, o2 = no("foo bar Copy"), no("foo bar")
o1.path = Path("deeper/path")
o2.path = Path("foo")
[group] = s.get_dupe_groups([o1, o2])
assert group.ref is o2
def test_tie_breaker_same_name_plus_digit(fake_fileexists):
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
# becomes a dupe
s = Scanner()
o1 = no('foo bar 42')
o2 = no('foo bar [42]')
o3 = no('foo bar (42)')
o4 = no('foo bar {42}')
o5 = no('foo bar')
o1 = no("foo bar 42")
o2 = no("foo bar [42]")
o3 = no("foo bar (42)")
o4 = no("foo bar {42}")
o5 = no("foo bar")
# all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
# used as tie breakers
o1.path = Path('deeper/path')
o2.path = Path('deeper/path')
o3.path = Path('deeper/path')
o4.path = Path('deeper/path')
o5.path = Path('foo')
o1.path = Path("deeper/path")
o2.path = Path("deeper/path")
o3.path = Path("deeper/path")
o4.path = Path("deeper/path")
o5.path = Path("foo")
[group] = s.get_dupe_groups([o1, o2, o3, o4, o5])
assert group.ref is o5
def test_partial_group_match(fake_fileexists):
# Count the number of discarded matches (when a file doesn't match all other dupes of the
# group) in Scanner.discarded_file_count
s = Scanner()
o1, o2, o3 = no('a b'), no('a'), no('b')
o1, o2, o3 = no("a b"), no("a"), no("b")
s.min_match_percentage = 50
[group] = s.get_dupe_groups([o1, o2, o3])
eq_(len(group), 2)
@@ -431,6 +483,7 @@ def test_partial_group_match(fake_fileexists):
assert o3 in group
eq_(s.discarded_file_count, 1)
def test_dont_group_files_that_dont_exist(tmpdir):
# when creating groups, check that files exist first. It's possible that these files have
# been moved during the scan by the user.
@@ -439,8 +492,8 @@ def test_dont_group_files_that_dont_exist(tmpdir):
s = Scanner()
s.scan_type = ScanType.Contents
p = Path(str(tmpdir))
p['file1'].open('w').write('foo')
p['file2'].open('w').write('foo')
p["file1"].open("w").write("foo")
p["file2"].open("w").write("foo")
file1, file2 = fs.get_files(p)
def getmatches(*args, **kw):
@@ -451,6 +504,7 @@ def test_dont_group_files_that_dont_exist(tmpdir):
assert not s.get_dupe_groups([file1, file2])
def test_folder_scan_exclude_subfolder_matches(fake_fileexists):
# when doing a Folders scan type, don't include matches for folders whose parent folder already
# match.
@@ -458,31 +512,33 @@ def test_folder_scan_exclude_subfolder_matches(fake_fileexists):
s.scan_type = ScanType.Folders
topf1 = no("top folder 1", size=42)
topf1.md5 = topf1.md5partial = b"some_md5_1"
topf1.path = Path('/topf1')
topf1.path = Path("/topf1")
topf2 = no("top folder 2", size=42)
topf2.md5 = topf2.md5partial = b"some_md5_1"
topf2.path = Path('/topf2')
topf2.path = Path("/topf2")
subf1 = no("sub folder 1", size=41)
subf1.md5 = subf1.md5partial = b"some_md5_2"
subf1.path = Path('/topf1/sub')
subf1.path = Path("/topf1/sub")
subf2 = no("sub folder 2", size=41)
subf2.md5 = subf2.md5partial = b"some_md5_2"
subf2.path = Path('/topf2/sub')
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2])), 1) # only top folders
subf2.path = Path("/topf2/sub")
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2])), 1) # only top folders
# however, if another folder matches a subfolder, keep in in the matches
otherf = no("other folder", size=41)
otherf.md5 = otherf.md5partial = b"some_md5_2"
otherf.path = Path('/otherfolder')
otherf.path = Path("/otherfolder")
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2, otherf])), 2)
def test_ignore_files_with_same_path(fake_fileexists):
# It's possible that the scanner is fed with two file instances pointing to the same path. One
# of these files has to be ignored
s = Scanner()
f1 = no('foobar', path='path1/foobar')
f2 = no('foobar', path='path1/foobar')
f1 = no("foobar", path="path1/foobar")
f2 = no("foobar", path="path1/foobar")
eq_(s.get_dupe_groups([f1, f2]), [])
def test_dont_count_ref_files_as_discarded(fake_fileexists):
# To speed up the scan, we don't bother comparing contents of files that are both ref files.
# However, this causes problems in "discarded" counting and we make sure here that we don't
@@ -492,20 +548,20 @@ def test_dont_count_ref_files_as_discarded(fake_fileexists):
o1 = no("foo", path="p1")
o2 = no("foo", path="p2")
o3 = no("foo", path="p3")
o1.md5 = o1.md5partial = 'foobar'
o2.md5 = o2.md5partial = 'foobar'
o3.md5 = o3.md5partial = 'foobar'
o1.md5 = o1.md5partial = "foobar"
o2.md5 = o2.md5partial = "foobar"
o3.md5 = o3.md5partial = "foobar"
o1.is_ref = True
o2.is_ref = True
eq_(len(s.get_dupe_groups([o1, o2, o3])), 1)
eq_(s.discarded_file_count, 0)
def test_priorize_me(fake_fileexists):
# in ScannerME, bitrate goes first (right after is_ref) in priorization
s = ScannerME()
o1, o2 = no('foo', path='p1'), no('foo', path='p2')
o1, o2 = no("foo", path="p1"), no("foo", path="p2")
o1.bitrate = 1
o2.bitrate = 2
[group] = s.get_dupe_groups([o1, o2])
assert group.ref is o2