mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
Stop using hsutil.testcase.
This commit is contained in:
parent
2d423b2358
commit
e0cc8ecda2
@ -7,16 +7,15 @@
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
import os
|
||||
import os.path as op
|
||||
import logging
|
||||
|
||||
from pytest import mark
|
||||
from hsutil.testutil import eq_
|
||||
from hsutil.testcase import TestCase
|
||||
from hsutil import io
|
||||
from hsutil.path import Path
|
||||
from hsutil.decorators import log_calls
|
||||
import hsutil.files
|
||||
from hscommon.testutil import CallLogger
|
||||
from hscommon.testutil import CallLogger, eq_
|
||||
from jobprogress.job import nulljob, Job, JobCancelled
|
||||
|
||||
from . import data
|
||||
@ -45,62 +44,61 @@ def add_fake_files_to_directories(directories, files):
|
||||
directories.get_files = lambda: iter(files)
|
||||
directories._dirs.append('this is just so Scan() doesnt return 3')
|
||||
|
||||
class TCDupeGuru(TestCase):
|
||||
cls_tested_module = app
|
||||
def test_apply_filter_calls_results_apply_filter(self):
|
||||
class TestCaseDupeGuru:
|
||||
def test_apply_filter_calls_results_apply_filter(self, monkeypatch):
|
||||
app = DupeGuru()
|
||||
self.mock(app.results, 'apply_filter', log_calls(app.results.apply_filter))
|
||||
monkeypatch.setattr(app.results, 'apply_filter', log_calls(app.results.apply_filter))
|
||||
app.apply_filter('foo')
|
||||
self.assertEqual(2, len(app.results.apply_filter.calls))
|
||||
eq_(2, len(app.results.apply_filter.calls))
|
||||
call = app.results.apply_filter.calls[0]
|
||||
self.assert_(call['filter_str'] is None)
|
||||
assert call['filter_str'] is None
|
||||
call = app.results.apply_filter.calls[1]
|
||||
self.assertEqual('foo', call['filter_str'])
|
||||
eq_('foo', call['filter_str'])
|
||||
|
||||
def test_apply_filter_escapes_regexp(self):
|
||||
def test_apply_filter_escapes_regexp(self, monkeypatch):
|
||||
app = DupeGuru()
|
||||
self.mock(app.results, 'apply_filter', log_calls(app.results.apply_filter))
|
||||
monkeypatch.setattr(app.results, 'apply_filter', log_calls(app.results.apply_filter))
|
||||
app.apply_filter('()[]\\.|+?^abc')
|
||||
call = app.results.apply_filter.calls[1]
|
||||
self.assertEqual('\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc', call['filter_str'])
|
||||
eq_('\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc', call['filter_str'])
|
||||
app.apply_filter('(*)') # In "simple mode", we want the * to behave as a wilcard
|
||||
call = app.results.apply_filter.calls[3]
|
||||
self.assertEqual('\(.*\)', call['filter_str'])
|
||||
eq_('\(.*\)', call['filter_str'])
|
||||
app.options['escape_filter_regexp'] = False
|
||||
app.apply_filter('(abc)')
|
||||
call = app.results.apply_filter.calls[5]
|
||||
self.assertEqual('(abc)', call['filter_str'])
|
||||
eq_('(abc)', call['filter_str'])
|
||||
|
||||
def test_copy_or_move(self):
|
||||
def test_copy_or_move(self, tmpdir, monkeypatch):
|
||||
# The goal here is just to have a test for a previous blowup I had. I know my test coverage
|
||||
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
||||
# every change I want to make. The blowup was caused by a missing import.
|
||||
p = self.tmppath()
|
||||
p = Path(str(tmpdir))
|
||||
io.open(p + 'foo', 'w').close()
|
||||
self.mock(hsutil.files, 'copy', log_calls(lambda source_path, dest_path: None))
|
||||
self.mock(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
|
||||
monkeypatch.setattr(hsutil.files, 'copy', log_calls(lambda source_path, dest_path: None))
|
||||
monkeypatch.setattr(os, 'makedirs', lambda path: None) # We don't want the test to create that fake directory
|
||||
app = DupeGuru()
|
||||
app.directories.add_path(p)
|
||||
[f] = app.directories.get_files()
|
||||
app.copy_or_move(f, True, 'some_destination', 0)
|
||||
self.assertEqual(1, len(hsutil.files.copy.calls))
|
||||
eq_(1, len(hsutil.files.copy.calls))
|
||||
call = hsutil.files.copy.calls[0]
|
||||
self.assertEqual('some_destination', call['dest_path'])
|
||||
self.assertEqual(f.path, call['source_path'])
|
||||
eq_('some_destination', call['dest_path'])
|
||||
eq_(f.path, call['source_path'])
|
||||
|
||||
def test_copy_or_move_clean_empty_dirs(self):
|
||||
tmppath = Path(self.tmpdir())
|
||||
def test_copy_or_move_clean_empty_dirs(self, tmpdir, monkeypatch):
|
||||
tmppath = Path(str(tmpdir))
|
||||
sourcepath = tmppath + 'source'
|
||||
io.mkdir(sourcepath)
|
||||
io.open(sourcepath + 'myfile', 'w')
|
||||
app = DupeGuru()
|
||||
app.directories.add_path(tmppath)
|
||||
[myfile] = app.directories.get_files()
|
||||
self.mock(app, 'clean_empty_dirs', log_calls(lambda path: None))
|
||||
monkeypatch.setattr(app, 'clean_empty_dirs', log_calls(lambda path: None))
|
||||
app.copy_or_move(myfile, False, tmppath + 'dest', 0)
|
||||
calls = app.clean_empty_dirs.calls
|
||||
self.assertEqual(1, len(calls))
|
||||
self.assertEqual(sourcepath, calls[0]['path'])
|
||||
eq_(1, len(calls))
|
||||
eq_(sourcepath, calls[0]['path'])
|
||||
|
||||
def test_Scan_with_objects_evaluating_to_false(self):
|
||||
class FakeFile(fs.File):
|
||||
@ -117,10 +115,10 @@ class TCDupeGuru(TestCase):
|
||||
app.start_scanning() # no exception
|
||||
|
||||
@mark.skipif("not hasattr(os, 'link')")
|
||||
def test_ignore_hardlink_matches(self):
|
||||
def test_ignore_hardlink_matches(self, tmpdir):
|
||||
# If the ignore_hardlink_matches option is set, don't match files hardlinking to the same
|
||||
# inode.
|
||||
tmppath = Path(self.tmpdir())
|
||||
tmppath = Path(str(tmpdir))
|
||||
io.open(tmppath + 'myfile', 'w').write('foo')
|
||||
os.link(str(tmppath + 'myfile'), str(tmppath + 'hardlink'))
|
||||
app = DupeGuru()
|
||||
@ -131,42 +129,42 @@ class TCDupeGuru(TestCase):
|
||||
eq_(len(app.results.groups), 0)
|
||||
|
||||
|
||||
class TCDupeGuru_clean_empty_dirs(TestCase):
|
||||
cls_tested_module = app
|
||||
def setUp(self):
|
||||
self.mock(hsutil.files, 'delete_if_empty', log_calls(lambda path, files_to_delete=[]: None))
|
||||
class TestCaseDupeGuru_clean_empty_dirs:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
monkeypatch = request.getfuncargvalue('monkeypatch')
|
||||
monkeypatch.setattr(hsutil.files, 'delete_if_empty', log_calls(lambda path, files_to_delete=[]: None))
|
||||
self.app = DupeGuru()
|
||||
|
||||
def test_option_off(self):
|
||||
def test_option_off(self, do_setup):
|
||||
self.app.clean_empty_dirs(Path('/foo/bar'))
|
||||
self.assertEqual(0, len(hsutil.files.delete_if_empty.calls))
|
||||
eq_(0, len(hsutil.files.delete_if_empty.calls))
|
||||
|
||||
def test_option_on(self):
|
||||
def test_option_on(self, do_setup):
|
||||
self.app.options['clean_empty_dirs'] = True
|
||||
self.app.clean_empty_dirs(Path('/foo/bar'))
|
||||
calls = hsutil.files.delete_if_empty.calls
|
||||
self.assertEqual(1, len(calls))
|
||||
self.assertEqual(Path('/foo/bar'), calls[0]['path'])
|
||||
self.assertEqual(['.DS_Store'], calls[0]['files_to_delete'])
|
||||
eq_(1, len(calls))
|
||||
eq_(Path('/foo/bar'), calls[0]['path'])
|
||||
eq_(['.DS_Store'], calls[0]['files_to_delete'])
|
||||
|
||||
def test_recurse_up(self):
|
||||
def test_recurse_up(self, do_setup, monkeypatch):
|
||||
# delete_if_empty must be recursively called up in the path until it returns False
|
||||
@log_calls
|
||||
def mock_delete_if_empty(path, files_to_delete=[]):
|
||||
return len(path) > 1
|
||||
|
||||
self.mock(hsutil.files, 'delete_if_empty', mock_delete_if_empty)
|
||||
monkeypatch.setattr(hsutil.files, 'delete_if_empty', mock_delete_if_empty)
|
||||
self.app.options['clean_empty_dirs'] = True
|
||||
self.app.clean_empty_dirs(Path('not-empty/empty/empty'))
|
||||
calls = hsutil.files.delete_if_empty.calls
|
||||
self.assertEqual(3, len(calls))
|
||||
self.assertEqual(Path('not-empty/empty/empty'), calls[0]['path'])
|
||||
self.assertEqual(Path('not-empty/empty'), calls[1]['path'])
|
||||
self.assertEqual(Path('not-empty'), calls[2]['path'])
|
||||
eq_(3, len(calls))
|
||||
eq_(Path('not-empty/empty/empty'), calls[0]['path'])
|
||||
eq_(Path('not-empty/empty'), calls[1]['path'])
|
||||
eq_(Path('not-empty'), calls[2]['path'])
|
||||
|
||||
|
||||
class TCDupeGuruWithResults(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseDupeGuruWithResults:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
self.app = DupeGuru()
|
||||
self.objects,self.matches,self.groups = GetTestGroups()
|
||||
self.app.results.groups = self.groups
|
||||
@ -179,12 +177,13 @@ class TCDupeGuruWithResults(TestCase):
|
||||
self.dpanel.connect()
|
||||
self.dtree.connect()
|
||||
self.rtable.connect()
|
||||
tmppath = self.tmppath()
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
tmppath = Path(str(tmpdir))
|
||||
io.mkdir(tmppath + 'foo')
|
||||
io.mkdir(tmppath + 'bar')
|
||||
self.app.directories.add_path(tmppath)
|
||||
|
||||
def test_GetObjects(self):
|
||||
def test_GetObjects(self, do_setup):
|
||||
objects = self.objects
|
||||
groups = self.groups
|
||||
r = self.rtable[0]
|
||||
@ -197,7 +196,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert r._group is groups[1]
|
||||
assert r._dupe is objects[4]
|
||||
|
||||
def test_GetObjects_after_sort(self):
|
||||
def test_GetObjects_after_sort(self, do_setup):
|
||||
objects = self.objects
|
||||
groups = self.groups[:] # we need an un-sorted reference
|
||||
self.rtable.sort(0, False) #0 = Filename
|
||||
@ -205,14 +204,14 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert r._group is groups[1]
|
||||
assert r._dupe is objects[4]
|
||||
|
||||
def test_selected_result_node_paths_after_deletion(self):
|
||||
def test_selected_result_node_paths_after_deletion(self, do_setup):
|
||||
# cases where the selected dupes aren't there are correctly handled
|
||||
self.rtable.select([1, 2, 3])
|
||||
self.app.remove_selected()
|
||||
# The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
|
||||
eq_(self.rtable.selected_indexes, [1]) # no exception
|
||||
|
||||
def test_selectResultNodePaths(self):
|
||||
def test_selectResultNodePaths(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
self.rtable.select([1, 2])
|
||||
@ -220,7 +219,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert app.selected_dupes[0] is objects[1]
|
||||
assert app.selected_dupes[1] is objects[2]
|
||||
|
||||
def test_selectResultNodePaths_with_ref(self):
|
||||
def test_selectResultNodePaths_with_ref(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
self.rtable.select([1, 2, 3])
|
||||
@ -229,7 +228,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert app.selected_dupes[1] is objects[2]
|
||||
assert app.selected_dupes[2] is self.groups[1].ref
|
||||
|
||||
def test_selectResultNodePaths_after_sort(self):
|
||||
def test_selectResultNodePaths_after_sort(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
groups = self.groups[:] #To keep the old order in memory
|
||||
@ -241,14 +240,14 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert app.selected_dupes[1] is groups[0].ref
|
||||
assert app.selected_dupes[2] is objects[1]
|
||||
|
||||
def test_selected_powermarker_node_paths(self):
|
||||
def test_selected_powermarker_node_paths(self, do_setup):
|
||||
# app.selected_dupes is correctly converted into paths
|
||||
self.rtable.power_marker = True
|
||||
self.rtable.select([0, 1, 2])
|
||||
self.rtable.power_marker = False
|
||||
eq_(self.rtable.selected_indexes, [1, 2, 4])
|
||||
|
||||
def test_selected_powermarker_node_paths_after_deletion(self):
|
||||
def test_selected_powermarker_node_paths_after_deletion(self, do_setup):
|
||||
# cases where the selected dupes aren't there are correctly handled
|
||||
app = self.app
|
||||
self.rtable.power_marker = True
|
||||
@ -256,7 +255,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
app.remove_selected()
|
||||
eq_(self.rtable.selected_indexes, []) # no exception
|
||||
|
||||
def test_selectPowerMarkerRows_after_sort(self):
|
||||
def test_selectPowerMarkerRows_after_sort(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
self.rtable.power_marker = True
|
||||
@ -267,7 +266,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert app.selected_dupes[1] is objects[2]
|
||||
assert app.selected_dupes[2] is objects[1]
|
||||
|
||||
def test_toggleSelectedMark(self):
|
||||
def test_toggleSelectedMark(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
app.toggle_selected_mark_state()
|
||||
@ -281,7 +280,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert not app.results.is_marked(objects[3])
|
||||
assert app.results.is_marked(objects[4])
|
||||
|
||||
def test_refreshDetailsWithSelected(self):
|
||||
def test_refreshDetailsWithSelected(self, do_setup):
|
||||
self.rtable.select([1, 4])
|
||||
eq_(self.dpanel.row(0), ('Filename', 'bar bleh', 'foo bar'))
|
||||
self.dpanel_gui.check_gui_calls(['refresh'])
|
||||
@ -289,7 +288,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
eq_(self.dpanel.row(0), ('Filename', '---', '---'))
|
||||
self.dpanel_gui.check_gui_calls(['refresh'])
|
||||
|
||||
def test_makeSelectedReference(self):
|
||||
def test_makeSelectedReference(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
groups = self.groups
|
||||
@ -298,7 +297,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert groups[0].ref is objects[1]
|
||||
assert groups[1].ref is objects[4]
|
||||
|
||||
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self):
|
||||
def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(self, do_setup):
|
||||
app = self.app
|
||||
objects = self.objects
|
||||
groups = self.groups
|
||||
@ -308,7 +307,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
assert groups[0].ref is objects[1]
|
||||
assert groups[1].ref is objects[4]
|
||||
|
||||
def test_removeSelected(self):
|
||||
def test_removeSelected(self, do_setup):
|
||||
app = self.app
|
||||
self.rtable.select([1, 4])
|
||||
app.remove_selected()
|
||||
@ -316,22 +315,25 @@ class TCDupeGuruWithResults(TestCase):
|
||||
app.remove_selected()
|
||||
eq_(len(app.results.dupes), 0)
|
||||
|
||||
def test_addDirectory_simple(self):
|
||||
def test_addDirectory_simple(self, do_setup):
|
||||
# There's already a directory in self.app, so adding another once makes 2 of em
|
||||
app = self.app
|
||||
eq_(app.add_directory(self.datadirpath()), 0)
|
||||
# any other path that isn't a parent or child of the already added path
|
||||
otherpath = Path(op.dirname(__file__))
|
||||
eq_(app.add_directory(otherpath), 0)
|
||||
eq_(len(app.directories), 2)
|
||||
|
||||
def test_addDirectory_already_there(self):
|
||||
def test_addDirectory_already_there(self, do_setup):
|
||||
app = self.app
|
||||
self.assertEqual(0,app.add_directory(self.datadirpath()))
|
||||
self.assertEqual(1,app.add_directory(self.datadirpath()))
|
||||
otherpath = Path(op.dirname(__file__))
|
||||
eq_(app.add_directory(otherpath), 0)
|
||||
eq_(app.add_directory(otherpath), 1)
|
||||
|
||||
def test_addDirectory_does_not_exist(self):
|
||||
def test_addDirectory_does_not_exist(self, do_setup):
|
||||
app = self.app
|
||||
self.assertEqual(2,app.add_directory('/does_not_exist'))
|
||||
eq_(2,app.add_directory('/does_not_exist'))
|
||||
|
||||
def test_ignore(self):
|
||||
def test_ignore(self, do_setup):
|
||||
app = self.app
|
||||
self.rtable.select([4]) #The dupe of the second, 2 sized group
|
||||
app.add_selected_to_ignore_list()
|
||||
@ -341,20 +343,22 @@ class TCDupeGuruWithResults(TestCase):
|
||||
#BOTH the ref and the other dupe should have been added
|
||||
eq_(len(app.scanner.ignore_list), 3)
|
||||
|
||||
def test_purgeIgnoreList(self):
|
||||
def test_purgeIgnoreList(self, do_setup, tmpdir):
|
||||
app = self.app
|
||||
p1 = self.filepath('zerofile')
|
||||
p2 = self.filepath('zerofill')
|
||||
p1 = str(tmpdir.join('file1'))
|
||||
p2 = str(tmpdir.join('file2'))
|
||||
open(p1, 'w').close()
|
||||
open(p2, 'w').close()
|
||||
dne = '/does_not_exist'
|
||||
app.scanner.ignore_list.Ignore(dne,p1)
|
||||
app.scanner.ignore_list.Ignore(p2,dne)
|
||||
app.scanner.ignore_list.Ignore(p1,p2)
|
||||
app.purge_ignore_list()
|
||||
self.assertEqual(1,len(app.scanner.ignore_list))
|
||||
self.assert_(app.scanner.ignore_list.AreIgnored(p1,p2))
|
||||
self.assert_(not app.scanner.ignore_list.AreIgnored(dne,p1))
|
||||
eq_(1,len(app.scanner.ignore_list))
|
||||
assert app.scanner.ignore_list.AreIgnored(p1,p2)
|
||||
assert not app.scanner.ignore_list.AreIgnored(dne,p1)
|
||||
|
||||
def test_only_unicode_is_added_to_ignore_list(self):
|
||||
def test_only_unicode_is_added_to_ignore_list(self, do_setup):
|
||||
def FakeIgnore(first,second):
|
||||
if not isinstance(first,str):
|
||||
self.fail()
|
||||
@ -366,7 +370,7 @@ class TCDupeGuruWithResults(TestCase):
|
||||
self.rtable.select([4])
|
||||
app.add_selected_to_ignore_list()
|
||||
|
||||
def test_cancel_scan_with_previous_results(self):
|
||||
def test_cancel_scan_with_previous_results(self, do_setup):
|
||||
# When doing a scan with results being present prior to the scan, correctly invalidate the
|
||||
# results table.
|
||||
app = self.app
|
||||
@ -375,9 +379,10 @@ class TCDupeGuruWithResults(TestCase):
|
||||
app.start_scanning() # will be cancelled immediately
|
||||
eq_(len(self.rtable), 0)
|
||||
|
||||
class TCDupeGuru_renameSelected(TestCase):
|
||||
def setUp(self):
|
||||
p = self.tmppath()
|
||||
class TestCaseDupeGuru_renameSelected:
|
||||
def pytest_funcarg__do_setup(self, request):
|
||||
tmpdir = request.getfuncargvalue('tmpdir')
|
||||
p = Path(str(tmpdir))
|
||||
fp = open(str(p + 'foo bar 1'),mode='w')
|
||||
fp.close()
|
||||
fp = open(str(p + 'foo bar 2'),mode='w')
|
||||
@ -399,7 +404,7 @@ class TCDupeGuru_renameSelected(TestCase):
|
||||
self.rtable = ResultTable(self.rtable_gui, self.app)
|
||||
self.rtable.connect()
|
||||
|
||||
def test_simple(self):
|
||||
def test_simple(self, do_setup):
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([1])
|
||||
@ -409,11 +414,11 @@ class TCDupeGuru_renameSelected(TestCase):
|
||||
assert 'foo bar 2' not in names
|
||||
eq_(g.dupes[0].name, 'renamed')
|
||||
|
||||
def test_none_selected(self):
|
||||
def test_none_selected(self, do_setup, monkeypatch):
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([])
|
||||
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
||||
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
|
||||
assert not app.rename_selected('renamed')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
eq_('dupeGuru Warning: list index out of range', msg)
|
||||
@ -422,11 +427,11 @@ class TCDupeGuru_renameSelected(TestCase):
|
||||
assert 'foo bar 2' in names
|
||||
eq_(g.dupes[0].name, 'foo bar 2')
|
||||
|
||||
def test_name_already_exists(self):
|
||||
def test_name_already_exists(self, do_setup, monkeypatch):
|
||||
app = self.app
|
||||
g = self.groups[0]
|
||||
self.rtable.select([1])
|
||||
self.mock(logging, 'warning', log_calls(lambda msg: None))
|
||||
monkeypatch.setattr(logging, 'warning', log_calls(lambda msg: None))
|
||||
assert not app.rename_selected('foo bar 1')
|
||||
msg = logging.warning.calls[0]['msg']
|
||||
assert msg.startswith('dupeGuru Warning: \'foo bar 1\' already exists in')
|
||||
|
@ -6,20 +6,20 @@
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.hardcoded.net/licenses/bsd_license
|
||||
|
||||
import os.path as op
|
||||
import os
|
||||
import time
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
from pytest import raises
|
||||
from hsutil import io
|
||||
from hsutil.path import Path
|
||||
from hsutil.testutil import eq_
|
||||
from hsutil.testcase import TestCase
|
||||
from hscommon.testutil import eq_
|
||||
|
||||
from ..directories import *
|
||||
|
||||
testpath = Path(TestCase.datadirpath())
|
||||
|
||||
def create_fake_fs(rootpath):
|
||||
# We have it as a separate function because other units are using it.
|
||||
rootpath = rootpath + 'fs'
|
||||
io.mkdir(rootpath)
|
||||
io.mkdir(rootpath + 'dir1')
|
||||
@ -45,233 +45,253 @@ def create_fake_fs(rootpath):
|
||||
fp.close()
|
||||
return rootpath
|
||||
|
||||
class TCDirectories(TestCase):
|
||||
def test_empty(self):
|
||||
d = Directories()
|
||||
self.assertEqual(0,len(d))
|
||||
self.assert_('foobar' not in d)
|
||||
|
||||
def test_add_path(self):
|
||||
d = Directories()
|
||||
p = testpath + 'utils'
|
||||
def setup_module(module):
|
||||
# In this unit, we have tests depending on two directory structure. One with only one file in it
|
||||
# and another with a more complex structure.
|
||||
testpath = Path(tempfile.mkdtemp())
|
||||
module.testpath = testpath
|
||||
rootpath = testpath + 'onefile'
|
||||
io.mkdir(rootpath)
|
||||
fp = io.open(rootpath + 'test.txt', 'w')
|
||||
fp.write('test_data')
|
||||
fp.close()
|
||||
create_fake_fs(testpath)
|
||||
|
||||
def teardown_module(module):
|
||||
shutil.rmtree(str(module.testpath))
|
||||
|
||||
def test_empty():
|
||||
d = Directories()
|
||||
eq_(len(d), 0)
|
||||
assert 'foobar' not in d
|
||||
|
||||
def test_add_path():
|
||||
d = Directories()
|
||||
p = testpath + 'onefile'
|
||||
d.add_path(p)
|
||||
eq_(1,len(d))
|
||||
assert p in d
|
||||
assert (p + 'foobar') in d
|
||||
assert p[:-1] not in d
|
||||
p = testpath + 'fs'
|
||||
d.add_path(p)
|
||||
eq_(2,len(d))
|
||||
assert p in d
|
||||
|
||||
def test_AddPath_when_path_is_already_there():
|
||||
d = Directories()
|
||||
p = testpath + 'onefile'
|
||||
d.add_path(p)
|
||||
with raises(AlreadyThereError):
|
||||
d.add_path(p)
|
||||
self.assertEqual(1,len(d))
|
||||
self.assert_(p in d)
|
||||
self.assert_((p + 'foobar') in d)
|
||||
self.assert_(p[:-1] not in d)
|
||||
p = self.tmppath()
|
||||
d.add_path(p)
|
||||
self.assertEqual(2,len(d))
|
||||
self.assert_(p in d)
|
||||
|
||||
def test_AddPath_when_path_is_already_there(self):
|
||||
d = Directories()
|
||||
p = testpath + 'utils'
|
||||
d.add_path(p)
|
||||
self.assertRaises(AlreadyThereError, d.add_path, p)
|
||||
self.assertRaises(AlreadyThereError, d.add_path, p + 'foobar')
|
||||
self.assertEqual(1, len(d))
|
||||
|
||||
def test_add_path_containing_paths_already_there(self):
|
||||
d = Directories()
|
||||
d.add_path(testpath + 'utils')
|
||||
self.assertEqual(1, len(d))
|
||||
d.add_path(testpath)
|
||||
eq_(len(d), 1)
|
||||
eq_(d[0], testpath)
|
||||
|
||||
def test_AddPath_non_latin(self):
|
||||
p = Path(self.tmpdir())
|
||||
to_add = p + 'unicode\u201a'
|
||||
os.mkdir(str(to_add))
|
||||
d = Directories()
|
||||
try:
|
||||
d.add_path(to_add)
|
||||
except UnicodeDecodeError:
|
||||
self.fail()
|
||||
|
||||
def test_del(self):
|
||||
d = Directories()
|
||||
d.add_path(testpath + 'utils')
|
||||
try:
|
||||
del d[1]
|
||||
self.fail()
|
||||
except IndexError:
|
||||
pass
|
||||
d.add_path(self.tmppath())
|
||||
with raises(AlreadyThereError):
|
||||
d.add_path(p + 'foobar')
|
||||
eq_(1, len(d))
|
||||
|
||||
def test_add_path_containing_paths_already_there():
|
||||
d = Directories()
|
||||
d.add_path(testpath + 'onefile')
|
||||
eq_(1, len(d))
|
||||
d.add_path(testpath)
|
||||
eq_(len(d), 1)
|
||||
eq_(d[0], testpath)
|
||||
|
||||
def test_AddPath_non_latin(tmpdir):
|
||||
p = Path(str(tmpdir))
|
||||
to_add = p + 'unicode\u201a'
|
||||
os.mkdir(str(to_add))
|
||||
d = Directories()
|
||||
try:
|
||||
d.add_path(to_add)
|
||||
except UnicodeDecodeError:
|
||||
assert False
|
||||
|
||||
def test_del():
|
||||
d = Directories()
|
||||
d.add_path(testpath + 'onefile')
|
||||
try:
|
||||
del d[1]
|
||||
self.assertEqual(1, len(d))
|
||||
|
||||
def test_states(self):
|
||||
d = Directories()
|
||||
p = testpath + 'utils'
|
||||
assert False
|
||||
except IndexError:
|
||||
pass
|
||||
d.add_path(testpath + 'fs')
|
||||
del d[1]
|
||||
eq_(1, len(d))
|
||||
|
||||
def test_states():
|
||||
d = Directories()
|
||||
p = testpath + 'onefile'
|
||||
d.add_path(p)
|
||||
eq_(STATE_NORMAL,d.get_state(p))
|
||||
d.set_state(p,STATE_REFERENCE)
|
||||
eq_(STATE_REFERENCE,d.get_state(p))
|
||||
eq_(STATE_REFERENCE,d.get_state(p + 'dir1'))
|
||||
eq_(1,len(d.states))
|
||||
eq_(p,list(d.states.keys())[0])
|
||||
eq_(STATE_REFERENCE,d.states[p])
|
||||
|
||||
def test_get_state_with_path_not_there():
|
||||
# When the path's not there, just return STATE_NORMAL
|
||||
d = Directories()
|
||||
d.add_path(testpath + 'onefile')
|
||||
eq_(d.get_state(testpath), STATE_NORMAL)
|
||||
|
||||
def test_states_remain_when_larger_directory_eat_smaller_ones():
|
||||
d = Directories()
|
||||
p = testpath + 'onefile'
|
||||
d.add_path(p)
|
||||
d.set_state(p,STATE_EXCLUDED)
|
||||
d.add_path(testpath)
|
||||
d.set_state(testpath,STATE_REFERENCE)
|
||||
eq_(STATE_EXCLUDED,d.get_state(p))
|
||||
eq_(STATE_EXCLUDED,d.get_state(p + 'dir1'))
|
||||
eq_(STATE_REFERENCE,d.get_state(testpath))
|
||||
|
||||
def test_set_state_keep_state_dict_size_to_minimum():
|
||||
d = Directories()
|
||||
p = testpath + 'fs'
|
||||
d.add_path(p)
|
||||
d.set_state(p,STATE_REFERENCE)
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
eq_(1,len(d.states))
|
||||
eq_(STATE_REFERENCE,d.get_state(p + 'dir1'))
|
||||
d.set_state(p + 'dir1',STATE_NORMAL)
|
||||
eq_(2,len(d.states))
|
||||
eq_(STATE_NORMAL,d.get_state(p + 'dir1'))
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
eq_(1,len(d.states))
|
||||
eq_(STATE_REFERENCE,d.get_state(p + 'dir1'))
|
||||
|
||||
def test_get_files():
|
||||
d = Directories()
|
||||
p = testpath + 'fs'
|
||||
d.add_path(p)
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
d.set_state(p + 'dir2',STATE_EXCLUDED)
|
||||
files = list(d.get_files())
|
||||
eq_(5, len(files))
|
||||
for f in files:
|
||||
if f.path[:-1] == p + 'dir1':
|
||||
assert f.is_ref
|
||||
else:
|
||||
assert not f.is_ref
|
||||
|
||||
def test_get_files_with_inherited_exclusion():
|
||||
d = Directories()
|
||||
p = testpath + 'onefile'
|
||||
d.add_path(p)
|
||||
d.set_state(p,STATE_EXCLUDED)
|
||||
eq_([], list(d.get_files()))
|
||||
|
||||
def test_save_and_load(tmpdir):
|
||||
d1 = Directories()
|
||||
d2 = Directories()
|
||||
p1 = Path(str(tmpdir.join('p1')))
|
||||
io.mkdir(p1)
|
||||
p2 = Path(str(tmpdir.join('p2')))
|
||||
io.mkdir(p2)
|
||||
d1.add_path(p1)
|
||||
d1.add_path(p2)
|
||||
d1.set_state(p1, STATE_REFERENCE)
|
||||
d1.set_state(p1 + 'dir1',STATE_EXCLUDED)
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
d1.save_to_file(tmpxml)
|
||||
d2.load_from_file(tmpxml)
|
||||
eq_(2, len(d2))
|
||||
eq_(STATE_REFERENCE,d2.get_state(p1))
|
||||
eq_(STATE_EXCLUDED,d2.get_state(p1 + 'dir1'))
|
||||
|
||||
def test_invalid_path():
|
||||
d = Directories()
|
||||
p = Path('does_not_exist')
|
||||
with raises(InvalidPathError):
|
||||
d.add_path(p)
|
||||
self.assertEqual(STATE_NORMAL,d.get_state(p))
|
||||
d.set_state(p,STATE_REFERENCE)
|
||||
self.assertEqual(STATE_REFERENCE,d.get_state(p))
|
||||
self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
|
||||
self.assertEqual(1,len(d.states))
|
||||
self.assertEqual(p,list(d.states.keys())[0])
|
||||
self.assertEqual(STATE_REFERENCE,d.states[p])
|
||||
|
||||
def test_get_state_with_path_not_there(self):
|
||||
# When the path's not there, just return STATE_NORMAL
|
||||
d = Directories()
|
||||
d.add_path(testpath + 'utils')
|
||||
eq_(d.get_state(testpath), STATE_NORMAL)
|
||||
|
||||
def test_states_remain_when_larger_directory_eat_smaller_ones(self):
|
||||
d = Directories()
|
||||
p = testpath + 'utils'
|
||||
d.add_path(p)
|
||||
d.set_state(p,STATE_EXCLUDED)
|
||||
d.add_path(testpath)
|
||||
d.set_state(testpath,STATE_REFERENCE)
|
||||
self.assertEqual(STATE_EXCLUDED,d.get_state(p))
|
||||
self.assertEqual(STATE_EXCLUDED,d.get_state(p + 'dir1'))
|
||||
self.assertEqual(STATE_REFERENCE,d.get_state(testpath))
|
||||
|
||||
def test_set_state_keep_state_dict_size_to_minimum(self):
|
||||
d = Directories()
|
||||
p = create_fake_fs(self.tmppath())
|
||||
d.add_path(p)
|
||||
d.set_state(p,STATE_REFERENCE)
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
self.assertEqual(1,len(d.states))
|
||||
self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
|
||||
d.set_state(p + 'dir1',STATE_NORMAL)
|
||||
self.assertEqual(2,len(d.states))
|
||||
self.assertEqual(STATE_NORMAL,d.get_state(p + 'dir1'))
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
self.assertEqual(1,len(d.states))
|
||||
self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
|
||||
|
||||
def test_get_files(self):
|
||||
d = Directories()
|
||||
p = create_fake_fs(self.tmppath())
|
||||
d.add_path(p)
|
||||
d.set_state(p + 'dir1',STATE_REFERENCE)
|
||||
d.set_state(p + 'dir2',STATE_EXCLUDED)
|
||||
files = list(d.get_files())
|
||||
self.assertEqual(5, len(files))
|
||||
for f in files:
|
||||
if f.path[:-1] == p + 'dir1':
|
||||
assert f.is_ref
|
||||
else:
|
||||
assert not f.is_ref
|
||||
|
||||
def test_get_files_with_inherited_exclusion(self):
|
||||
d = Directories()
|
||||
p = testpath + 'utils'
|
||||
d.add_path(p)
|
||||
d.set_state(p,STATE_EXCLUDED)
|
||||
self.assertEqual([], list(d.get_files()))
|
||||
|
||||
def test_save_and_load(self):
|
||||
d1 = Directories()
|
||||
d2 = Directories()
|
||||
p1 = self.tmppath()
|
||||
p2 = self.tmppath()
|
||||
d1.add_path(p1)
|
||||
d1.add_path(p2)
|
||||
d1.set_state(p1, STATE_REFERENCE)
|
||||
d1.set_state(p1 + 'dir1',STATE_EXCLUDED)
|
||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||
d1.save_to_file(tmpxml)
|
||||
d2.load_from_file(tmpxml)
|
||||
self.assertEqual(2, len(d2))
|
||||
self.assertEqual(STATE_REFERENCE,d2.get_state(p1))
|
||||
self.assertEqual(STATE_EXCLUDED,d2.get_state(p1 + 'dir1'))
|
||||
|
||||
def test_invalid_path(self):
|
||||
d = Directories()
|
||||
p = Path('does_not_exist')
|
||||
self.assertRaises(InvalidPathError, d.add_path, p)
|
||||
self.assertEqual(0, len(d))
|
||||
|
||||
def test_set_state_on_invalid_path(self):
|
||||
d = Directories()
|
||||
try:
|
||||
d.set_state(Path('foobar',),STATE_NORMAL)
|
||||
except LookupError:
|
||||
self.fail()
|
||||
|
||||
def test_load_from_file_with_invalid_path(self):
|
||||
#This test simulates a load from file resulting in a
|
||||
#InvalidPath raise. Other directories must be loaded.
|
||||
d1 = Directories()
|
||||
d1.add_path(testpath + 'utils')
|
||||
#Will raise InvalidPath upon loading
|
||||
p = self.tmppath()
|
||||
d1.add_path(p)
|
||||
io.rmdir(p)
|
||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||
d1.save_to_file(tmpxml)
|
||||
d2 = Directories()
|
||||
d2.load_from_file(tmpxml)
|
||||
self.assertEqual(1, len(d2))
|
||||
|
||||
def test_unicode_save(self):
|
||||
d = Directories()
|
||||
p1 = self.tmppath() + 'hello\xe9'
|
||||
io.mkdir(p1)
|
||||
io.mkdir(p1 + 'foo\xe9')
|
||||
d.add_path(p1)
|
||||
d.set_state(p1 + 'foo\xe9', STATE_EXCLUDED)
|
||||
tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
|
||||
try:
|
||||
d.save_to_file(tmpxml)
|
||||
except UnicodeDecodeError:
|
||||
self.fail()
|
||||
|
||||
def test_get_files_refreshes_its_directories(self):
|
||||
d = Directories()
|
||||
p = create_fake_fs(self.tmppath())
|
||||
d.add_path(p)
|
||||
files = d.get_files()
|
||||
self.assertEqual(6, len(list(files)))
|
||||
time.sleep(1)
|
||||
os.remove(str(p + ('dir1','file1.test')))
|
||||
files = d.get_files()
|
||||
self.assertEqual(5, len(list(files)))
|
||||
|
||||
def test_get_files_does_not_choke_on_non_existing_directories(self):
|
||||
d = Directories()
|
||||
p = Path(self.tmpdir())
|
||||
d.add_path(p)
|
||||
io.rmtree(p)
|
||||
self.assertEqual([], list(d.get_files()))
|
||||
|
||||
def test_get_state_returns_excluded_by_default_for_hidden_directories(self):
|
||||
d = Directories()
|
||||
p = Path(self.tmpdir())
|
||||
hidden_dir_path = p + '.foo'
|
||||
io.mkdir(p + '.foo')
|
||||
d.add_path(p)
|
||||
self.assertEqual(d.get_state(hidden_dir_path), STATE_EXCLUDED)
|
||||
# But it can be overriden
|
||||
d.set_state(hidden_dir_path, STATE_NORMAL)
|
||||
self.assertEqual(d.get_state(hidden_dir_path), STATE_NORMAL)
|
||||
|
||||
def test_default_path_state_override(self):
|
||||
# It's possible for a subclass to override the default state of a path
|
||||
class MyDirectories(Directories):
|
||||
def _default_state_for_path(self, path):
|
||||
if 'foobar' in path:
|
||||
return STATE_EXCLUDED
|
||||
|
||||
d = MyDirectories()
|
||||
p1 = self.tmppath()
|
||||
io.mkdir(p1 + 'foobar')
|
||||
io.open(p1 + 'foobar/somefile', 'w').close()
|
||||
io.mkdir(p1 + 'foobaz')
|
||||
io.open(p1 + 'foobaz/somefile', 'w').close()
|
||||
d.add_path(p1)
|
||||
eq_(d.get_state(p1 + 'foobaz'), STATE_NORMAL)
|
||||
eq_(d.get_state(p1 + 'foobar'), STATE_EXCLUDED)
|
||||
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
||||
# However, the default state can be changed
|
||||
d.set_state(p1 + 'foobar', STATE_NORMAL)
|
||||
eq_(d.get_state(p1 + 'foobar'), STATE_NORMAL)
|
||||
eq_(len(list(d.get_files())), 2)
|
||||
eq_(0, len(d))
|
||||
|
||||
def test_set_state_on_invalid_path():
|
||||
d = Directories()
|
||||
try:
|
||||
d.set_state(Path('foobar',),STATE_NORMAL)
|
||||
except LookupError:
|
||||
assert False
|
||||
|
||||
def test_load_from_file_with_invalid_path(tmpdir):
|
||||
#This test simulates a load from file resulting in a
|
||||
#InvalidPath raise. Other directories must be loaded.
|
||||
d1 = Directories()
|
||||
d1.add_path(testpath + 'onefile')
|
||||
#Will raise InvalidPath upon loading
|
||||
p = Path(str(tmpdir.join('toremove')))
|
||||
io.mkdir(p)
|
||||
d1.add_path(p)
|
||||
io.rmdir(p)
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
d1.save_to_file(tmpxml)
|
||||
d2 = Directories()
|
||||
d2.load_from_file(tmpxml)
|
||||
eq_(1, len(d2))
|
||||
|
||||
def test_unicode_save(tmpdir):
|
||||
d = Directories()
|
||||
p1 = Path(str(tmpdir)) + 'hello\xe9'
|
||||
io.mkdir(p1)
|
||||
io.mkdir(p1 + 'foo\xe9')
|
||||
d.add_path(p1)
|
||||
d.set_state(p1 + 'foo\xe9', STATE_EXCLUDED)
|
||||
tmpxml = str(tmpdir.join('directories_testunit.xml'))
|
||||
try:
|
||||
d.save_to_file(tmpxml)
|
||||
except UnicodeDecodeError:
|
||||
assert False
|
||||
|
||||
def test_get_files_refreshes_its_directories():
|
||||
d = Directories()
|
||||
p = testpath + 'fs'
|
||||
d.add_path(p)
|
||||
files = d.get_files()
|
||||
eq_(6, len(list(files)))
|
||||
time.sleep(1)
|
||||
os.remove(str(p + ('dir1','file1.test')))
|
||||
files = d.get_files()
|
||||
eq_(5, len(list(files)))
|
||||
|
||||
def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
|
||||
d = Directories()
|
||||
p = Path(str(tmpdir))
|
||||
d.add_path(p)
|
||||
io.rmtree(p)
|
||||
eq_([], list(d.get_files()))
|
||||
|
||||
def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
|
||||
d = Directories()
|
||||
p = Path(str(tmpdir))
|
||||
hidden_dir_path = p + '.foo'
|
||||
io.mkdir(p + '.foo')
|
||||
d.add_path(p)
|
||||
eq_(d.get_state(hidden_dir_path), STATE_EXCLUDED)
|
||||
# But it can be overriden
|
||||
d.set_state(hidden_dir_path, STATE_NORMAL)
|
||||
eq_(d.get_state(hidden_dir_path), STATE_NORMAL)
|
||||
|
||||
def test_default_path_state_override(tmpdir):
|
||||
# It's possible for a subclass to override the default state of a path
|
||||
class MyDirectories(Directories):
|
||||
def _default_state_for_path(self, path):
|
||||
if 'foobar' in path:
|
||||
return STATE_EXCLUDED
|
||||
|
||||
d = MyDirectories()
|
||||
p1 = Path(str(tmpdir))
|
||||
io.mkdir(p1 + 'foobar')
|
||||
io.open(p1 + 'foobar/somefile', 'w').close()
|
||||
io.mkdir(p1 + 'foobaz')
|
||||
io.open(p1 + 'foobaz/somefile', 'w').close()
|
||||
d.add_path(p1)
|
||||
eq_(d.get_state(p1 + 'foobaz'), STATE_NORMAL)
|
||||
eq_(d.get_state(p1 + 'foobar'), STATE_EXCLUDED)
|
||||
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
||||
# However, the default state can be changed
|
||||
d.set_state(p1 + 'foobar', STATE_NORMAL)
|
||||
eq_(d.get_state(p1 + 'foobar'), STATE_NORMAL)
|
||||
eq_(len(list(d.get_files())), 2)
|
||||
|
||||
|
@ -12,12 +12,11 @@ from jobprogress import job
|
||||
from hsutil.decorators import log_calls
|
||||
from hsutil.misc import first
|
||||
from hsutil.testutil import eq_
|
||||
from hsutil.testcase import TestCase
|
||||
|
||||
from .. import engine
|
||||
from ..engine import *
|
||||
|
||||
class NamedObject(object):
|
||||
class NamedObject:
|
||||
def __init__(self, name="foobar", with_words=False, size=1):
|
||||
self.name = name
|
||||
self.size = size
|
||||
@ -55,179 +54,179 @@ def assert_match(m, name1, name2):
|
||||
eq_(m.first.name, name2)
|
||||
eq_(m.second.name, name1)
|
||||
|
||||
class TCgetwords(TestCase):
|
||||
class TestCasegetwords:
|
||||
def test_spaces(self):
|
||||
self.assertEqual(['a', 'b', 'c', 'd'], getwords("a b c d"))
|
||||
self.assertEqual(['a', 'b', 'c', 'd'], getwords(" a b c d "))
|
||||
eq_(['a', 'b', 'c', 'd'], getwords("a b c d"))
|
||||
eq_(['a', 'b', 'c', 'd'], getwords(" a b c d "))
|
||||
|
||||
def test_splitter_chars(self):
|
||||
self.assertEqual(
|
||||
eq_(
|
||||
[chr(i) for i in range(ord('a'),ord('z')+1)],
|
||||
getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
|
||||
)
|
||||
|
||||
def test_joiner_chars(self):
|
||||
self.assertEqual(["aec"], getwords("a'e\u0301c"))
|
||||
eq_(["aec"], getwords("a'e\u0301c"))
|
||||
|
||||
def test_empty(self):
|
||||
self.assertEqual([], getwords(''))
|
||||
eq_([], getwords(''))
|
||||
|
||||
def test_returns_lowercase(self):
|
||||
self.assertEqual(['foo', 'bar'], getwords('FOO BAR'))
|
||||
eq_(['foo', 'bar'], getwords('FOO BAR'))
|
||||
|
||||
def test_decompose_unicode(self):
|
||||
self.assertEqual(getwords('foo\xe9bar'), ['fooebar'])
|
||||
eq_(getwords('foo\xe9bar'), ['fooebar'])
|
||||
|
||||
|
||||
class TCgetfields(TestCase):
|
||||
class TestCasegetfields:
|
||||
def test_simple(self):
|
||||
self.assertEqual([['a', 'b'], ['c', 'd', 'e']], getfields('a b - c d e'))
|
||||
eq_([['a', 'b'], ['c', 'd', 'e']], getfields('a b - c d e'))
|
||||
|
||||
def test_empty(self):
|
||||
self.assertEqual([], getfields(''))
|
||||
|
||||
eq_([], getfields(''))
|
||||
|
||||
def test_cleans_empty_fields(self):
|
||||
expected = [['a', 'bc', 'def']]
|
||||
actual = getfields(' - a bc def')
|
||||
self.assertEqual(expected, actual)
|
||||
eq_(expected, actual)
|
||||
expected = [['bc', 'def']]
|
||||
|
||||
|
||||
class TCunpack_fields(TestCase):
|
||||
class TestCaseunpack_fields:
|
||||
def test_with_fields(self):
|
||||
expected = ['a', 'b', 'c', 'd', 'e', 'f']
|
||||
actual = unpack_fields([['a'], ['b', 'c'], ['d', 'e', 'f']])
|
||||
self.assertEqual(expected, actual)
|
||||
eq_(expected, actual)
|
||||
|
||||
def test_without_fields(self):
|
||||
expected = ['a', 'b', 'c', 'd', 'e', 'f']
|
||||
actual = unpack_fields(['a', 'b', 'c', 'd', 'e', 'f'])
|
||||
self.assertEqual(expected, actual)
|
||||
eq_(expected, actual)
|
||||
|
||||
def test_empty(self):
|
||||
self.assertEqual([], unpack_fields([]))
|
||||
eq_([], unpack_fields([]))
|
||||
|
||||
|
||||
class TCWordCompare(TestCase):
|
||||
class TestCaseWordCompare:
|
||||
def test_list(self):
|
||||
self.assertEqual(100, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c', 'd']))
|
||||
self.assertEqual(86, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c']))
|
||||
eq_(100, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c', 'd']))
|
||||
eq_(86, compare(['a', 'b', 'c', 'd'],['a', 'b', 'c']))
|
||||
|
||||
def test_unordered(self):
|
||||
#Sometimes, users don't want fuzzy matching too much When they set the slider
|
||||
#to 100, they don't expect a filename with the same words, but not the same order, to match.
|
||||
#Thus, we want to return 99 in that case.
|
||||
self.assertEqual(99, compare(['a', 'b', 'c', 'd'], ['d', 'b', 'c', 'a']))
|
||||
eq_(99, compare(['a', 'b', 'c', 'd'], ['d', 'b', 'c', 'a']))
|
||||
|
||||
def test_word_occurs_twice(self):
|
||||
#if a word occurs twice in first, but once in second, we want the word to be only counted once
|
||||
self.assertEqual(89, compare(['a', 'b', 'c', 'd', 'a'], ['d', 'b', 'c', 'a']))
|
||||
eq_(89, compare(['a', 'b', 'c', 'd', 'a'], ['d', 'b', 'c', 'a']))
|
||||
|
||||
def test_uses_copy_of_lists(self):
|
||||
first = ['foo', 'bar']
|
||||
second = ['bar', 'bleh']
|
||||
compare(first, second)
|
||||
self.assertEqual(['foo', 'bar'], first)
|
||||
self.assertEqual(['bar', 'bleh'], second)
|
||||
eq_(['foo', 'bar'], first)
|
||||
eq_(['bar', 'bleh'], second)
|
||||
|
||||
def test_word_weight(self):
|
||||
self.assertEqual(int((6.0 / 13.0) * 100), compare(['foo', 'bar'], ['bar', 'bleh'], (WEIGHT_WORDS, )))
|
||||
eq_(int((6.0 / 13.0) * 100), compare(['foo', 'bar'], ['bar', 'bleh'], (WEIGHT_WORDS, )))
|
||||
|
||||
def test_similar_words(self):
|
||||
self.assertEqual(100, compare(['the', 'white', 'stripes'],['the', 'whites', 'stripe'], (MATCH_SIMILAR_WORDS, )))
|
||||
eq_(100, compare(['the', 'white', 'stripes'],['the', 'whites', 'stripe'], (MATCH_SIMILAR_WORDS, )))
|
||||
|
||||
def test_empty(self):
|
||||
self.assertEqual(0, compare([], []))
|
||||
eq_(0, compare([], []))
|
||||
|
||||
def test_with_fields(self):
|
||||
self.assertEqual(67, compare([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
eq_(67, compare([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
|
||||
def test_propagate_flags_with_fields(self):
|
||||
def test_propagate_flags_with_fields(self, monkeypatch):
|
||||
def mock_compare(first, second, flags):
|
||||
self.assertEqual((0, 1, 2, 3, 5), flags)
|
||||
eq_((0, 1, 2, 3, 5), flags)
|
||||
|
||||
self.mock(engine, 'compare_fields', mock_compare)
|
||||
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
|
||||
compare([['a']], [['a']], (0, 1, 2, 3, 5))
|
||||
|
||||
|
||||
class TCWordCompareWithFields(TestCase):
|
||||
class TestCaseWordCompareWithFields:
|
||||
def test_simple(self):
|
||||
self.assertEqual(67, compare_fields([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
eq_(67, compare_fields([['a', 'b'], ['c', 'd', 'e']], [['a', 'b'], ['c', 'd', 'f']]))
|
||||
|
||||
def test_empty(self):
|
||||
self.assertEqual(0, compare_fields([], []))
|
||||
eq_(0, compare_fields([], []))
|
||||
|
||||
def test_different_length(self):
|
||||
self.assertEqual(0, compare_fields([['a'], ['b']], [['a'], ['b'], ['c']]))
|
||||
eq_(0, compare_fields([['a'], ['b']], [['a'], ['b'], ['c']]))
|
||||
|
||||
def test_propagates_flags(self):
|
||||
def test_propagates_flags(self, monkeypatch):
|
||||
def mock_compare(first, second, flags):
|
||||
self.assertEqual((0, 1, 2, 3, 5), flags)
|
||||
eq_((0, 1, 2, 3, 5), flags)
|
||||
|
||||
self.mock(engine, 'compare_fields', mock_compare)
|
||||
monkeypatch.setattr(engine, 'compare_fields', mock_compare)
|
||||
compare_fields([['a']], [['a']],(0, 1, 2, 3, 5))
|
||||
|
||||
def test_order(self):
|
||||
first = [['a', 'b'], ['c', 'd', 'e']]
|
||||
second = [['c', 'd', 'f'], ['a', 'b']]
|
||||
self.assertEqual(0, compare_fields(first, second))
|
||||
eq_(0, compare_fields(first, second))
|
||||
|
||||
def test_no_order(self):
|
||||
first = [['a','b'],['c','d','e']]
|
||||
second = [['c','d','f'],['a','b']]
|
||||
self.assertEqual(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [['a','b'],['a','b']] #a field can only be matched once.
|
||||
second = [['c','d','f'],['a','b']]
|
||||
self.assertEqual(0, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
eq_(0, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
first = [['a','b'],['a','b','c']]
|
||||
second = [['c','d','f'],['a','b']]
|
||||
self.assertEqual(33, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
eq_(33, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
|
||||
def test_compare_fields_without_order_doesnt_alter_fields(self):
|
||||
#The NO_ORDER comp type altered the fields!
|
||||
first = [['a','b'],['c','d','e']]
|
||||
second = [['c','d','f'],['a','b']]
|
||||
self.assertEqual(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
self.assertEqual([['a','b'],['c','d','e']],first)
|
||||
self.assertEqual([['c','d','f'],['a','b']],second)
|
||||
eq_(67, compare_fields(first, second, (NO_FIELD_ORDER, )))
|
||||
eq_([['a','b'],['c','d','e']],first)
|
||||
eq_([['c','d','f'],['a','b']],second)
|
||||
|
||||
|
||||
class TCbuild_word_dict(TestCase):
|
||||
class TestCasebuild_word_dict:
|
||||
def test_with_standard_words(self):
|
||||
l = [NamedObject('foo bar',True)]
|
||||
l.append(NamedObject('bar baz',True))
|
||||
l.append(NamedObject('baz bleh foo',True))
|
||||
d = build_word_dict(l)
|
||||
self.assertEqual(4,len(d))
|
||||
self.assertEqual(2,len(d['foo']))
|
||||
self.assert_(l[0] in d['foo'])
|
||||
self.assert_(l[2] in d['foo'])
|
||||
self.assertEqual(2,len(d['bar']))
|
||||
self.assert_(l[0] in d['bar'])
|
||||
self.assert_(l[1] in d['bar'])
|
||||
self.assertEqual(2,len(d['baz']))
|
||||
self.assert_(l[1] in d['baz'])
|
||||
self.assert_(l[2] in d['baz'])
|
||||
self.assertEqual(1,len(d['bleh']))
|
||||
self.assert_(l[2] in d['bleh'])
|
||||
eq_(4,len(d))
|
||||
eq_(2,len(d['foo']))
|
||||
assert l[0] in d['foo']
|
||||
assert l[2] in d['foo']
|
||||
eq_(2,len(d['bar']))
|
||||
assert l[0] in d['bar']
|
||||
assert l[1] in d['bar']
|
||||
eq_(2,len(d['baz']))
|
||||
assert l[1] in d['baz']
|
||||
assert l[2] in d['baz']
|
||||
eq_(1,len(d['bleh']))
|
||||
assert l[2] in d['bleh']
|
||||
|
||||
def test_unpack_fields(self):
|
||||
o = NamedObject('')
|
||||
o.words = [['foo','bar'],['baz']]
|
||||
d = build_word_dict([o])
|
||||
self.assertEqual(3,len(d))
|
||||
self.assertEqual(1,len(d['foo']))
|
||||
eq_(3,len(d))
|
||||
eq_(1,len(d['foo']))
|
||||
|
||||
def test_words_are_unaltered(self):
|
||||
o = NamedObject('')
|
||||
o.words = [['foo','bar'],['baz']]
|
||||
d = build_word_dict([o])
|
||||
self.assertEqual([['foo','bar'],['baz']],o.words)
|
||||
build_word_dict([o])
|
||||
eq_([['foo','bar'],['baz']],o.words)
|
||||
|
||||
def test_object_instances_can_only_be_once_in_words_object_list(self):
|
||||
o = NamedObject('foo foo',True)
|
||||
d = build_word_dict([o])
|
||||
self.assertEqual(1,len(d['foo']))
|
||||
eq_(1,len(d['foo']))
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(p,d=''):
|
||||
@ -239,11 +238,11 @@ class TCbuild_word_dict(TestCase):
|
||||
s = "foo bar"
|
||||
build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
|
||||
# We don't have intermediate log because iter_with_progress is called with every > 1
|
||||
self.assertEqual(0,self.log[0])
|
||||
self.assertEqual(100,self.log[1])
|
||||
eq_(0,self.log[0])
|
||||
eq_(100,self.log[1])
|
||||
|
||||
|
||||
class TCmerge_similar_words(TestCase):
|
||||
class TestCasemerge_similar_words:
|
||||
def test_some_similar_words(self):
|
||||
d = {
|
||||
'foobar':set([1]),
|
||||
@ -251,20 +250,20 @@ class TCmerge_similar_words(TestCase):
|
||||
'foobar2':set([3]),
|
||||
}
|
||||
merge_similar_words(d)
|
||||
self.assertEqual(1,len(d))
|
||||
self.assertEqual(3,len(d['foobar']))
|
||||
eq_(1,len(d))
|
||||
eq_(3,len(d['foobar']))
|
||||
|
||||
|
||||
|
||||
class TCreduce_common_words(TestCase):
|
||||
class TestCasereduce_common_words:
|
||||
def test_typical(self):
|
||||
d = {
|
||||
'foo': set([NamedObject('foo bar',True) for i in range(50)]),
|
||||
'bar': set([NamedObject('foo bar',True) for i in range(49)])
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
self.assert_('foo' not in d)
|
||||
self.assertEqual(49,len(d['bar']))
|
||||
assert 'foo' not in d
|
||||
eq_(49,len(d['bar']))
|
||||
|
||||
def test_dont_remove_objects_with_only_common_words(self):
|
||||
d = {
|
||||
@ -272,8 +271,8 @@ class TCreduce_common_words(TestCase):
|
||||
'uncommon': set([NamedObject("common uncommon",True)])
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
self.assertEqual(1,len(d['common']))
|
||||
self.assertEqual(1,len(d['uncommon']))
|
||||
eq_(1,len(d['common']))
|
||||
eq_(1,len(d['uncommon']))
|
||||
|
||||
def test_values_still_are_set_instances(self):
|
||||
d = {
|
||||
@ -281,8 +280,8 @@ class TCreduce_common_words(TestCase):
|
||||
'uncommon': set([NamedObject("common uncommon",True)])
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
self.assert_(isinstance(d['common'],set))
|
||||
self.assert_(isinstance(d['uncommon'],set))
|
||||
assert isinstance(d['common'],set)
|
||||
assert isinstance(d['uncommon'],set)
|
||||
|
||||
def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
|
||||
#If a word has been removed by the reduce, an object in a subsequent common word that
|
||||
@ -324,42 +323,42 @@ class TCreduce_common_words(TestCase):
|
||||
'baz': set([NamedObject('foo bar baz',True) for i in range(49)])
|
||||
}
|
||||
reduce_common_words(d, 50)
|
||||
self.assertEqual(1,len(d['foo']))
|
||||
self.assertEqual(1,len(d['bar']))
|
||||
self.assertEqual(49,len(d['baz']))
|
||||
eq_(1,len(d['foo']))
|
||||
eq_(1,len(d['bar']))
|
||||
eq_(49,len(d['baz']))
|
||||
|
||||
|
||||
class TCget_match(TestCase):
|
||||
class TestCaseget_match:
|
||||
def test_simple(self):
|
||||
o1 = NamedObject("foo bar",True)
|
||||
o2 = NamedObject("bar bleh",True)
|
||||
m = get_match(o1,o2)
|
||||
self.assertEqual(50,m.percentage)
|
||||
self.assertEqual(['foo','bar'],m.first.words)
|
||||
self.assertEqual(['bar','bleh'],m.second.words)
|
||||
self.assert_(m.first is o1)
|
||||
self.assert_(m.second is o2)
|
||||
eq_(50,m.percentage)
|
||||
eq_(['foo','bar'],m.first.words)
|
||||
eq_(['bar','bleh'],m.second.words)
|
||||
assert m.first is o1
|
||||
assert m.second is o2
|
||||
|
||||
def test_in(self):
|
||||
o1 = NamedObject("foo",True)
|
||||
o2 = NamedObject("bar",True)
|
||||
m = get_match(o1,o2)
|
||||
self.assert_(o1 in m)
|
||||
self.assert_(o2 in m)
|
||||
self.assert_(object() not in m)
|
||||
assert o1 in m
|
||||
assert o2 in m
|
||||
assert object() not in m
|
||||
|
||||
def test_word_weight(self):
|
||||
self.assertEqual(int((6.0 / 13.0) * 100),get_match(NamedObject("foo bar",True),NamedObject("bar bleh",True),(WEIGHT_WORDS,)).percentage)
|
||||
eq_(int((6.0 / 13.0) * 100),get_match(NamedObject("foo bar",True),NamedObject("bar bleh",True),(WEIGHT_WORDS,)).percentage)
|
||||
|
||||
|
||||
class GetMatches(TestCase):
|
||||
class TestCaseGetMatches:
|
||||
def test_empty(self):
|
||||
eq_(getmatches([]), [])
|
||||
|
||||
def test_simple(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
||||
r = getmatches(l)
|
||||
self.assertEqual(2,len(r))
|
||||
eq_(2,len(r))
|
||||
m = first(m for m in r if m.percentage == 50) #"foo bar" and "bar bleh"
|
||||
assert_match(m, 'foo bar', 'bar bleh')
|
||||
m = first(m for m in r if m.percentage == 33) #"foo bar" and "a b c foo"
|
||||
@ -376,17 +375,17 @@ class GetMatches(TestCase):
|
||||
def test_twice_the_same_word(self):
|
||||
l = [NamedObject("foo foo bar"),NamedObject("bar bleh")]
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1,len(r))
|
||||
eq_(1,len(r))
|
||||
|
||||
def test_twice_the_same_word_when_preworded(self):
|
||||
l = [NamedObject("foo foo bar",True),NamedObject("bar bleh",True)]
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1,len(r))
|
||||
eq_(1,len(r))
|
||||
|
||||
def test_two_words_match(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("foo bar bleh")]
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1,len(r))
|
||||
eq_(1,len(r))
|
||||
|
||||
def test_match_files_with_only_common_words(self):
|
||||
#If a word occurs more than 50 times, it is excluded from the matching process
|
||||
@ -395,7 +394,7 @@ class GetMatches(TestCase):
|
||||
# This test assumes that the common word threashold const is 50
|
||||
l = [NamedObject("foo") for i in range(50)]
|
||||
r = getmatches(l)
|
||||
self.assertEqual(1225,len(r))
|
||||
eq_(1225,len(r))
|
||||
|
||||
def test_use_words_already_there_if_there(self):
|
||||
o1 = NamedObject('foo')
|
||||
@ -412,14 +411,14 @@ class GetMatches(TestCase):
|
||||
self.log = []
|
||||
s = "foo bar"
|
||||
getmatches([NamedObject(s), NamedObject(s), NamedObject(s)], j=j)
|
||||
self.assert_(len(self.log) > 2)
|
||||
self.assertEqual(0,self.log[0])
|
||||
self.assertEqual(100,self.log[-1])
|
||||
assert len(self.log) > 2
|
||||
eq_(0,self.log[0])
|
||||
eq_(100,self.log[-1])
|
||||
|
||||
def test_weight_words(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
||||
m = getmatches(l, weight_words=True)[0]
|
||||
self.assertEqual(int((6.0 / 13.0) * 100),m.percentage)
|
||||
eq_(int((6.0 / 13.0) * 100),m.percentage)
|
||||
|
||||
def test_similar_word(self):
|
||||
l = [NamedObject("foobar"),NamedObject("foobars")]
|
||||
@ -439,7 +438,7 @@ class GetMatches(TestCase):
|
||||
def test_double_words_get_counted_only_once(self):
|
||||
l = [NamedObject("foo bar foo bleh"),NamedObject("foo bar bleh bar")]
|
||||
m = getmatches(l)[0]
|
||||
self.assertEqual(75,m.percentage)
|
||||
eq_(75,m.percentage)
|
||||
|
||||
def test_with_fields(self):
|
||||
o1 = NamedObject("foo bar - foo bleh")
|
||||
@ -447,7 +446,7 @@ class GetMatches(TestCase):
|
||||
o1.words = getfields(o1.name)
|
||||
o2.words = getfields(o2.name)
|
||||
m = getmatches([o1, o2])[0]
|
||||
self.assertEqual(50, m.percentage)
|
||||
eq_(50, m.percentage)
|
||||
|
||||
def test_with_fields_no_order(self):
|
||||
o1 = NamedObject("foo bar - foo bleh")
|
||||
@ -475,9 +474,9 @@ class GetMatches(TestCase):
|
||||
def test_min_match_percentage(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh"),NamedObject("a b c foo")]
|
||||
r = getmatches(l, min_match_percentage=50)
|
||||
self.assertEqual(1,len(r)) #Only "foo bar" / "bar bleh" should match
|
||||
eq_(1,len(r)) #Only "foo bar" / "bar bleh" should match
|
||||
|
||||
def test_MemoryError(self):
|
||||
def test_MemoryError(self, monkeypatch):
|
||||
@log_calls
|
||||
def mocked_match(first, second, flags):
|
||||
if len(mocked_match.calls) > 42:
|
||||
@ -485,35 +484,35 @@ class GetMatches(TestCase):
|
||||
return Match(first, second, 0)
|
||||
|
||||
objects = [NamedObject() for i in range(10)] # results in 45 matches
|
||||
self.mock(engine, 'get_match', mocked_match)
|
||||
monkeypatch.setattr(engine, 'get_match', mocked_match)
|
||||
try:
|
||||
r = getmatches(objects)
|
||||
except MemoryError:
|
||||
self.fail('MemorryError must be handled')
|
||||
self.assertEqual(42, len(r))
|
||||
eq_(42, len(r))
|
||||
|
||||
|
||||
class GetMatchesByContents(TestCase):
|
||||
class TestCaseGetMatchesByContents:
|
||||
def test_dont_compare_empty_files(self):
|
||||
o1, o2 = no(size=0), no(size=0)
|
||||
assert not getmatches_by_contents([o1, o2])
|
||||
|
||||
|
||||
class TCGroup(TestCase):
|
||||
class TestCaseGroup:
|
||||
def test_empy(self):
|
||||
g = Group()
|
||||
self.assertEqual(None,g.ref)
|
||||
self.assertEqual([],g.dupes)
|
||||
self.assertEqual(0,len(g.matches))
|
||||
eq_(None,g.ref)
|
||||
eq_([],g.dupes)
|
||||
eq_(0,len(g.matches))
|
||||
|
||||
def test_add_match(self):
|
||||
g = Group()
|
||||
m = get_match(NamedObject("foo",True),NamedObject("bar",True))
|
||||
g.add_match(m)
|
||||
self.assert_(g.ref is m.first)
|
||||
self.assertEqual([m.second],g.dupes)
|
||||
self.assertEqual(1,len(g.matches))
|
||||
self.assert_(m in g.matches)
|
||||
assert g.ref is m.first
|
||||
eq_([m.second],g.dupes)
|
||||
eq_(1,len(g.matches))
|
||||
assert m in g.matches
|
||||
|
||||
def test_multiple_add_match(self):
|
||||
g = Group()
|
||||
@ -522,49 +521,49 @@ class TCGroup(TestCase):
|
||||
o3 = NamedObject("c",True)
|
||||
o4 = NamedObject("d",True)
|
||||
g.add_match(get_match(o1,o2))
|
||||
self.assert_(g.ref is o1)
|
||||
self.assertEqual([o2],g.dupes)
|
||||
self.assertEqual(1,len(g.matches))
|
||||
assert g.ref is o1
|
||||
eq_([o2],g.dupes)
|
||||
eq_(1,len(g.matches))
|
||||
g.add_match(get_match(o1,o3))
|
||||
self.assertEqual([o2],g.dupes)
|
||||
self.assertEqual(2,len(g.matches))
|
||||
eq_([o2],g.dupes)
|
||||
eq_(2,len(g.matches))
|
||||
g.add_match(get_match(o2,o3))
|
||||
self.assertEqual([o2,o3],g.dupes)
|
||||
self.assertEqual(3,len(g.matches))
|
||||
eq_([o2,o3],g.dupes)
|
||||
eq_(3,len(g.matches))
|
||||
g.add_match(get_match(o1,o4))
|
||||
self.assertEqual([o2,o3],g.dupes)
|
||||
self.assertEqual(4,len(g.matches))
|
||||
eq_([o2,o3],g.dupes)
|
||||
eq_(4,len(g.matches))
|
||||
g.add_match(get_match(o2,o4))
|
||||
self.assertEqual([o2,o3],g.dupes)
|
||||
self.assertEqual(5,len(g.matches))
|
||||
eq_([o2,o3],g.dupes)
|
||||
eq_(5,len(g.matches))
|
||||
g.add_match(get_match(o3,o4))
|
||||
self.assertEqual([o2,o3,o4],g.dupes)
|
||||
self.assertEqual(6,len(g.matches))
|
||||
eq_([o2,o3,o4],g.dupes)
|
||||
eq_(6,len(g.matches))
|
||||
|
||||
def test_len(self):
|
||||
g = Group()
|
||||
self.assertEqual(0,len(g))
|
||||
eq_(0,len(g))
|
||||
g.add_match(get_match(NamedObject("foo",True),NamedObject("bar",True)))
|
||||
self.assertEqual(2,len(g))
|
||||
eq_(2,len(g))
|
||||
|
||||
def test_add_same_match_twice(self):
|
||||
g = Group()
|
||||
m = get_match(NamedObject("foo",True),NamedObject("foo",True))
|
||||
g.add_match(m)
|
||||
self.assertEqual(2,len(g))
|
||||
self.assertEqual(1,len(g.matches))
|
||||
eq_(2,len(g))
|
||||
eq_(1,len(g.matches))
|
||||
g.add_match(m)
|
||||
self.assertEqual(2,len(g))
|
||||
self.assertEqual(1,len(g.matches))
|
||||
eq_(2,len(g))
|
||||
eq_(1,len(g.matches))
|
||||
|
||||
def test_in(self):
|
||||
g = Group()
|
||||
o1 = NamedObject("foo",True)
|
||||
o2 = NamedObject("bar",True)
|
||||
self.assert_(o1 not in g)
|
||||
assert o1 not in g
|
||||
g.add_match(get_match(o1,o2))
|
||||
self.assert_(o1 in g)
|
||||
self.assert_(o2 in g)
|
||||
assert o1 in g
|
||||
assert o2 in g
|
||||
|
||||
def test_remove(self):
|
||||
g = Group()
|
||||
@ -574,14 +573,14 @@ class TCGroup(TestCase):
|
||||
g.add_match(get_match(o1,o2))
|
||||
g.add_match(get_match(o1,o3))
|
||||
g.add_match(get_match(o2,o3))
|
||||
self.assertEqual(3,len(g.matches))
|
||||
self.assertEqual(3,len(g))
|
||||
eq_(3,len(g.matches))
|
||||
eq_(3,len(g))
|
||||
g.remove_dupe(o3)
|
||||
self.assertEqual(1,len(g.matches))
|
||||
self.assertEqual(2,len(g))
|
||||
eq_(1,len(g.matches))
|
||||
eq_(2,len(g))
|
||||
g.remove_dupe(o1)
|
||||
self.assertEqual(0,len(g.matches))
|
||||
self.assertEqual(0,len(g))
|
||||
eq_(0,len(g.matches))
|
||||
eq_(0,len(g))
|
||||
|
||||
def test_remove_with_ref_dupes(self):
|
||||
g = Group()
|
||||
@ -594,21 +593,21 @@ class TCGroup(TestCase):
|
||||
o1.is_ref = True
|
||||
o2.is_ref = True
|
||||
g.remove_dupe(o3)
|
||||
self.assertEqual(0,len(g))
|
||||
eq_(0,len(g))
|
||||
|
||||
def test_switch_ref(self):
|
||||
o1 = NamedObject(with_words=True)
|
||||
o2 = NamedObject(with_words=True)
|
||||
g = Group()
|
||||
g.add_match(get_match(o1,o2))
|
||||
self.assert_(o1 is g.ref)
|
||||
assert o1 is g.ref
|
||||
g.switch_ref(o2)
|
||||
self.assert_(o2 is g.ref)
|
||||
self.assertEqual([o1],g.dupes)
|
||||
assert o2 is g.ref
|
||||
eq_([o1],g.dupes)
|
||||
g.switch_ref(o2)
|
||||
self.assert_(o2 is g.ref)
|
||||
assert o2 is g.ref
|
||||
g.switch_ref(NamedObject('',True))
|
||||
self.assert_(o2 is g.ref)
|
||||
assert o2 is g.ref
|
||||
|
||||
def test_get_match_of(self):
|
||||
g = Group()
|
||||
@ -616,10 +615,10 @@ class TCGroup(TestCase):
|
||||
g.add_match(m)
|
||||
o = g.dupes[0]
|
||||
m = g.get_match_of(o)
|
||||
self.assert_(g.ref in m)
|
||||
self.assert_(o in m)
|
||||
self.assert_(g.get_match_of(NamedObject('',True)) is None)
|
||||
self.assert_(g.get_match_of(g.ref) is None)
|
||||
assert g.ref in m
|
||||
assert o in m
|
||||
assert g.get_match_of(NamedObject('',True)) is None
|
||||
assert g.get_match_of(g.ref) is None
|
||||
|
||||
def test_percentage(self):
|
||||
#percentage should return the avg percentage in relation to the ref
|
||||
@ -631,18 +630,18 @@ class TCGroup(TestCase):
|
||||
g.add_match(m1)
|
||||
g.add_match(m2)
|
||||
g.add_match(m3)
|
||||
self.assertEqual(75,g.percentage)
|
||||
eq_(75,g.percentage)
|
||||
g.switch_ref(g.dupes[0])
|
||||
self.assertEqual(66,g.percentage)
|
||||
eq_(66,g.percentage)
|
||||
g.remove_dupe(g.dupes[0])
|
||||
self.assertEqual(33,g.percentage)
|
||||
eq_(33,g.percentage)
|
||||
g.add_match(m1)
|
||||
g.add_match(m2)
|
||||
self.assertEqual(66,g.percentage)
|
||||
eq_(66,g.percentage)
|
||||
|
||||
def test_percentage_on_empty_group(self):
|
||||
g = Group()
|
||||
self.assertEqual(0,g.percentage)
|
||||
eq_(0,g.percentage)
|
||||
|
||||
def test_prioritize(self):
|
||||
m1,m2,m3 = get_match_triangle()
|
||||
@ -656,9 +655,9 @@ class TCGroup(TestCase):
|
||||
g.add_match(m1)
|
||||
g.add_match(m2)
|
||||
g.add_match(m3)
|
||||
self.assert_(o1 is g.ref)
|
||||
assert o1 is g.ref
|
||||
g.prioritize(lambda x:x.name)
|
||||
self.assert_(o3 is g.ref)
|
||||
assert o3 is g.ref
|
||||
|
||||
def test_prioritize_with_tie_breaker(self):
|
||||
# if the ref has the same key as one or more of the dupe, run the tie_breaker func among them
|
||||
@ -666,7 +665,7 @@ class TCGroup(TestCase):
|
||||
o1, o2, o3 = g.ordered
|
||||
tie_breaker = lambda ref, dupe: dupe is o3
|
||||
g.prioritize(lambda x:0, tie_breaker)
|
||||
self.assertTrue(g.ref is o3)
|
||||
assert g.ref is o3
|
||||
|
||||
def test_prioritize_with_tie_breaker_runs_on_all_dupes(self):
|
||||
# Even if a dupe is chosen to switch with ref with a tie breaker, we still run the tie breaker
|
||||
@ -678,7 +677,7 @@ class TCGroup(TestCase):
|
||||
o3.foo = 3
|
||||
tie_breaker = lambda ref, dupe: dupe.foo > ref.foo
|
||||
g.prioritize(lambda x:0, tie_breaker)
|
||||
self.assertTrue(g.ref is o3)
|
||||
assert g.ref is o3
|
||||
|
||||
def test_prioritize_with_tie_breaker_runs_only_on_tie_dupes(self):
|
||||
# The tie breaker only runs on dupes that had the same value for the key_func
|
||||
@ -693,14 +692,14 @@ class TCGroup(TestCase):
|
||||
key_func = lambda x: -x.foo
|
||||
tie_breaker = lambda ref, dupe: dupe.bar > ref.bar
|
||||
g.prioritize(key_func, tie_breaker)
|
||||
self.assertTrue(g.ref is o2)
|
||||
assert g.ref is o2
|
||||
|
||||
def test_list_like(self):
|
||||
g = Group()
|
||||
o1,o2 = (NamedObject("foo",True),NamedObject("bar",True))
|
||||
g.add_match(get_match(o1,o2))
|
||||
self.assert_(g[0] is o1)
|
||||
self.assert_(g[1] is o2)
|
||||
assert g[0] is o1
|
||||
assert g[1] is o2
|
||||
|
||||
def test_discard_matches(self):
|
||||
g = Group()
|
||||
@ -708,33 +707,33 @@ class TCGroup(TestCase):
|
||||
g.add_match(get_match(o1,o2))
|
||||
g.add_match(get_match(o1,o3))
|
||||
g.discard_matches()
|
||||
self.assertEqual(1,len(g.matches))
|
||||
self.assertEqual(0,len(g.candidates))
|
||||
eq_(1,len(g.matches))
|
||||
eq_(0,len(g.candidates))
|
||||
|
||||
|
||||
class TCget_groups(TestCase):
|
||||
class TestCaseget_groups:
|
||||
def test_empty(self):
|
||||
r = get_groups([])
|
||||
self.assertEqual([],r)
|
||||
eq_([],r)
|
||||
|
||||
def test_simple(self):
|
||||
l = [NamedObject("foo bar"),NamedObject("bar bleh")]
|
||||
matches = getmatches(l)
|
||||
m = matches[0]
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(1,len(r))
|
||||
eq_(1,len(r))
|
||||
g = r[0]
|
||||
self.assert_(g.ref is m.first)
|
||||
self.assertEqual([m.second],g.dupes)
|
||||
assert g.ref is m.first
|
||||
eq_([m.second],g.dupes)
|
||||
|
||||
def test_group_with_multiple_matches(self):
|
||||
#This results in 3 matches
|
||||
l = [NamedObject("foo"),NamedObject("foo"),NamedObject("foo")]
|
||||
matches = getmatches(l)
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(1,len(r))
|
||||
eq_(1,len(r))
|
||||
g = r[0]
|
||||
self.assertEqual(3,len(g))
|
||||
eq_(3,len(g))
|
||||
|
||||
def test_must_choose_a_group(self):
|
||||
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("b c"),NamedObject("c d"),NamedObject("c d")]
|
||||
@ -742,8 +741,8 @@ class TCget_groups(TestCase):
|
||||
#"b c" can go either of them, but not both.
|
||||
matches = getmatches(l)
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(2,len(r))
|
||||
self.assertEqual(5,len(r[0])+len(r[1]))
|
||||
eq_(2,len(r))
|
||||
eq_(5,len(r[0])+len(r[1]))
|
||||
|
||||
def test_should_all_go_in_the_same_group(self):
|
||||
l = [NamedObject("a b"),NamedObject("a b"),NamedObject("a b"),NamedObject("a b")]
|
||||
@ -751,7 +750,7 @@ class TCget_groups(TestCase):
|
||||
#"b c" can fit in both, but it must be in only one of them
|
||||
matches = getmatches(l)
|
||||
r = get_groups(matches)
|
||||
self.assertEqual(1,len(r))
|
||||
eq_(1,len(r))
|
||||
|
||||
def test_give_priority_to_matches_with_higher_percentage(self):
|
||||
o1 = NamedObject(with_words=True)
|
||||
@ -760,19 +759,19 @@ class TCget_groups(TestCase):
|
||||
m1 = Match(o1, o2, 1)
|
||||
m2 = Match(o2, o3, 2)
|
||||
r = get_groups([m1,m2])
|
||||
self.assertEqual(1,len(r))
|
||||
eq_(1,len(r))
|
||||
g = r[0]
|
||||
self.assertEqual(2,len(g))
|
||||
self.assert_(o1 not in g)
|
||||
self.assert_(o2 in g)
|
||||
self.assert_(o3 in g)
|
||||
eq_(2,len(g))
|
||||
assert o1 not in g
|
||||
assert o2 in g
|
||||
assert o3 in g
|
||||
|
||||
def test_four_sized_group(self):
|
||||
l = [NamedObject("foobar") for i in range(4)]
|
||||
m = getmatches(l)
|
||||
r = get_groups(m)
|
||||
self.assertEqual(1,len(r))
|
||||
self.assertEqual(4,len(r[0]))
|
||||
eq_(1,len(r))
|
||||
eq_(4,len(r[0]))
|
||||
|
||||
def test_referenced_by_ref2(self):
|
||||
o1 = NamedObject(with_words=True)
|
||||
@ -782,7 +781,7 @@ class TCget_groups(TestCase):
|
||||
m2 = get_match(o3,o1)
|
||||
m3 = get_match(o3,o2)
|
||||
r = get_groups([m1,m2,m3])
|
||||
self.assertEqual(3,len(r[0]))
|
||||
eq_(3,len(r[0]))
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(p,d=''):
|
||||
@ -795,8 +794,8 @@ class TCget_groups(TestCase):
|
||||
#101%: To make sure it is processed first so the job test works correctly
|
||||
m4 = Match(NamedObject('a',True), NamedObject('a',True), 101)
|
||||
get_groups([m1,m2,m3,m4],j)
|
||||
self.assertEqual(0,self.log[0])
|
||||
self.assertEqual(100,self.log[-1])
|
||||
eq_(0,self.log[0])
|
||||
eq_(100,self.log[-1])
|
||||
|
||||
def test_group_admissible_discarded_dupes(self):
|
||||
# If, with a (A, B, C, D) set, all match with A, but C and D don't match with B and that the
|
||||
|
@ -13,8 +13,7 @@ import os.path as op
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
from hsutil.path import Path
|
||||
from hsutil.testutil import eq_
|
||||
from hsutil.testcase import TestCase
|
||||
from hscommon.testutil import eq_
|
||||
from hsutil.misc import first
|
||||
|
||||
from . import engine_test, data
|
||||
@ -44,8 +43,8 @@ def GetTestGroups():
|
||||
groups.sort(key=len, reverse=True) # We want the group with 3 members to be first.
|
||||
return (objects,matches,groups)
|
||||
|
||||
class TCResultsEmpty(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseResultsEmpty:
|
||||
def setup_method(self, method):
|
||||
self.results = Results(data)
|
||||
|
||||
def test_apply_invalid_filter(self):
|
||||
@ -74,8 +73,8 @@ class TCResultsEmpty(TestCase):
|
||||
assert not self.results.is_modified
|
||||
|
||||
|
||||
class TCResultsWithSomeGroups(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseResultsWithSomeGroups:
|
||||
def setup_method(self, method):
|
||||
self.results = Results(data)
|
||||
self.objects,self.matches,self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
@ -222,8 +221,8 @@ class TCResultsWithSomeGroups(TestCase):
|
||||
assert not self.results.is_modified
|
||||
|
||||
|
||||
class ResultsWithSavedResults(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseResultsWithSavedResults:
|
||||
def setup_method(self, method):
|
||||
self.results = Results(data)
|
||||
self.objects,self.matches,self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
@ -255,8 +254,8 @@ class ResultsWithSavedResults(TestCase):
|
||||
assert self.results.is_modified
|
||||
|
||||
|
||||
class TCResultsMarkings(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseResultsMarkings:
|
||||
def setup_method(self, method):
|
||||
self.results = Results(data)
|
||||
self.objects,self.matches,self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
@ -356,7 +355,6 @@ class TCResultsMarkings(TestCase):
|
||||
|
||||
def test_remove_duplicates(self):
|
||||
g1 = self.results.groups[0]
|
||||
g2 = self.results.groups[1]
|
||||
self.results.mark(g1.dupes[0])
|
||||
eq_("1 / 3 (1.00 KB / 1.01 KB) duplicates marked.",self.results.stat_line)
|
||||
self.results.remove_duplicates([g1.dupes[1]])
|
||||
@ -410,8 +408,8 @@ class TCResultsMarkings(TestCase):
|
||||
assert r.is_marked(self.objects[4])
|
||||
|
||||
|
||||
class TCResultsXML(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseResultsXML:
|
||||
def setup_method(self, method):
|
||||
self.results = Results(data)
|
||||
self.objects, self.matches, self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
@ -486,11 +484,11 @@ class TCResultsXML(TestCase):
|
||||
eq_(['ibabtu'],g2[0].words)
|
||||
eq_(['ibabtu'],g2[1].words)
|
||||
|
||||
def test_LoadXML_with_filename(self):
|
||||
def test_LoadXML_with_filename(self, tmpdir):
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
filename = op.join(self.tmpdir(), 'dupeguru_results.xml')
|
||||
filename = str(tmpdir.join('dupeguru_results.xml'))
|
||||
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
|
||||
self.results.save_to_xml(filename)
|
||||
r = Results(data)
|
||||
@ -634,8 +632,8 @@ class TCResultsXML(TestCase):
|
||||
self.results.save_to_xml(io.BytesIO()) # don't crash
|
||||
|
||||
|
||||
class TCResultsFilter(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseResultsFilter:
|
||||
def setup_method(self, method):
|
||||
self.results = Results(data)
|
||||
self.objects, self.matches, self.groups = GetTestGroups()
|
||||
self.results.groups = self.groups
|
||||
@ -716,11 +714,11 @@ class TCResultsFilter(TestCase):
|
||||
eq_(1, len(self.results.groups))
|
||||
assert self.results.groups[0] is self.groups[0]
|
||||
|
||||
def test_load_cancels_filter(self):
|
||||
def test_load_cancels_filter(self, tmpdir):
|
||||
def get_file(path):
|
||||
return [f for f in self.objects if str(f.path) == path][0]
|
||||
|
||||
filename = op.join(self.tmpdir(), 'dupeguru_results.xml')
|
||||
filename = str(tmpdir.join('dupeguru_results.xml'))
|
||||
self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
|
||||
self.results.save_to_xml(filename)
|
||||
r = Results(data)
|
||||
@ -759,8 +757,8 @@ class TCResultsFilter(TestCase):
|
||||
eq_(expected, self.results.stat_line)
|
||||
|
||||
|
||||
class TCResultsRefFile(TestCase):
|
||||
def setUp(self):
|
||||
class TestCaseResultsRefFile:
|
||||
def setup_method(self, method):
|
||||
self.results = Results(data)
|
||||
self.objects, self.matches, self.groups = GetTestGroups()
|
||||
self.objects[0].is_ref = True
|
||||
|
@ -9,15 +9,14 @@
|
||||
from jobprogress import job
|
||||
from hsutil import io
|
||||
from hsutil.path import Path
|
||||
from hsutil.testutil import eq_
|
||||
from hsutil.testcase import TestCase
|
||||
from hscommon.testutil import eq_
|
||||
|
||||
from .. import fs
|
||||
from ..engine import getwords, Match
|
||||
from ..ignore import IgnoreList
|
||||
from ..scanner import *
|
||||
|
||||
class NamedObject(object):
|
||||
class NamedObject:
|
||||
def __init__(self, name="foobar", size=1):
|
||||
self.name = name
|
||||
self.size = size
|
||||
@ -30,449 +29,445 @@ class NamedObject(object):
|
||||
|
||||
no = NamedObject
|
||||
|
||||
#--- Scanner
|
||||
class ScannerTestFakeFiles(TestCase):
|
||||
def setUp(self):
|
||||
# This is a hack to avoid invalidating all previous tests since the scanner started to test
|
||||
# for file existence before doing the match grouping.
|
||||
self.mock(io, 'exists', lambda _: True)
|
||||
|
||||
def test_empty(self):
|
||||
s = Scanner()
|
||||
r = s.GetDupeGroups([])
|
||||
eq_(r, [])
|
||||
|
||||
def test_default_settings(self):
|
||||
s = Scanner()
|
||||
eq_(s.min_match_percentage, 80)
|
||||
eq_(s.scan_type, ScanType.Filename)
|
||||
eq_(s.mix_file_kind, True)
|
||||
eq_(s.word_weighting, False)
|
||||
eq_(s.match_similar_words, False)
|
||||
assert isinstance(s.ignore_list, IgnoreList)
|
||||
|
||||
def test_simple_with_default_settings(self):
|
||||
s = Scanner()
|
||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
#'foo bleh' cannot be in the group because the default min match % is 80
|
||||
eq_(len(g), 2)
|
||||
assert g.ref in f[:2]
|
||||
assert g.dupes[0] in f[:2]
|
||||
|
||||
def test_simple_with_lower_min_match(self):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 50
|
||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
eq_(len(g), 3)
|
||||
|
||||
def test_trim_all_ref_groups(self):
|
||||
# When all files of a group are ref, don't include that group in the results, but also don't
|
||||
# count the files from that group as discarded.
|
||||
s = Scanner()
|
||||
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
||||
f[2].is_ref = True
|
||||
f[3].is_ref = True
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(s.discarded_file_count, 0)
|
||||
|
||||
def test_priorize(self):
|
||||
s = Scanner()
|
||||
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
||||
f[1].size = 2
|
||||
f[2].size = 3
|
||||
f[3].is_ref = True
|
||||
r = s.GetDupeGroups(f)
|
||||
g1, g2 = r
|
||||
assert f[1] in (g1.ref,g2.ref)
|
||||
assert f[0] in (g1.dupes[0],g2.dupes[0])
|
||||
assert f[3] in (g1.ref,g2.ref)
|
||||
assert f[2] in (g1.dupes[0],g2.dupes[0])
|
||||
|
||||
def test_content_scan(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = f[0].md5partial = 'foobar'
|
||||
f[1].md5 = f[1].md5partial = 'foobar'
|
||||
f[2].md5 = f[2].md5partial = 'bleh'
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
||||
|
||||
def test_content_scan_compare_sizes_first(self):
|
||||
class MyFile(no):
|
||||
@property
|
||||
def md5(file):
|
||||
raise AssertionError()
|
||||
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [MyFile('foo', 1), MyFile('bar', 2)]
|
||||
eq_(len(s.GetDupeGroups(f)), 0)
|
||||
|
||||
def test_min_match_perc_doesnt_matter_for_content_scan(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = f[0].md5partial = 'foobar'
|
||||
f[1].md5 = f[1].md5partial = 'foobar'
|
||||
f[2].md5 = f[2].md5partial = 'bleh'
|
||||
s.min_match_percentage = 101
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
s.min_match_percentage = 0
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
def test_content_scan_doesnt_put_md5_in_words_at_the_end(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'),no('bar')]
|
||||
f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||
r = s.GetDupeGroups(f)
|
||||
g = r[0]
|
||||
|
||||
def test_extension_is_not_counted_in_filename_scan(self):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 100
|
||||
f = [no('foo.bar'), no('foo.bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
def test_job(self):
|
||||
def do_progress(progress, desc=''):
|
||||
log.append(progress)
|
||||
return True
|
||||
|
||||
s = Scanner()
|
||||
log = []
|
||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||
r = s.GetDupeGroups(f, job.Job(1, do_progress))
|
||||
eq_(log[0], 0)
|
||||
eq_(log[-1], 100)
|
||||
|
||||
def test_mix_file_kind(self):
|
||||
s = Scanner()
|
||||
s.mix_file_kind = False
|
||||
f = [no('foo.1'), no('foo.2')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 0)
|
||||
|
||||
def test_word_weighting(self):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 75
|
||||
s.word_weighting = True
|
||||
f = [no('foo bar'), no('foo bar bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
m = g.get_match_of(g.dupes[0])
|
||||
eq_(m.percentage, 75) # 16 letters, 12 matching
|
||||
|
||||
def test_similar_words(self):
|
||||
s = Scanner()
|
||||
s.match_similar_words = True
|
||||
f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 2)
|
||||
|
||||
def test_fields(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Fields
|
||||
f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 0)
|
||||
|
||||
def test_fields_no_order(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.FieldsNoOrder
|
||||
f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'The Air Near My Fingers'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'The Air Near My Fingers'
|
||||
r = s.GetDupeGroups([o1,o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_with_album_scan(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o3 = no('bleh')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'The Air Near My Fingers'
|
||||
o1.album = 'Elephant'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'The Air Near My Fingers'
|
||||
o2.album = 'Elephant'
|
||||
o3.artist = 'The White Stripes'
|
||||
o3.title = 'The Air Near My Fingers'
|
||||
o3.album = 'foobar'
|
||||
r = s.GetDupeGroups([o1,o2,o3])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_that_dash_in_tags_dont_create_new_fields(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||
s.min_match_percentage = 50
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes - a'
|
||||
o1.title = 'The Air Near My Fingers - a'
|
||||
o1.album = 'Elephant - a'
|
||||
o2.artist = 'The White Stripes - b'
|
||||
o2.title = 'The Air Near My Fingers - b'
|
||||
o2.album = 'Elephant - b'
|
||||
r = s.GetDupeGroups([o1,o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan_with_different_scanned(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['track', 'year'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'some title'
|
||||
o1.track = 'foo'
|
||||
o1.year = 'bar'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'another title'
|
||||
o2.track = 'foo'
|
||||
o2.year = 'bar'
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan_only_scans_existing_tags(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'foo'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.foo = 'foo'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.foo = 'bar'
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
eq_(len(r), 1) # Because 'foo' is not scanned, they match
|
||||
|
||||
def test_tag_scan_converts_to_str(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['track'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.track = 42
|
||||
o2.track = 42
|
||||
try:
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
except TypeError:
|
||||
def pytest_funcarg__fake_fileexists(request):
|
||||
# This is a hack to avoid invalidating all previous tests since the scanner started to test
|
||||
# for file existence before doing the match grouping.
|
||||
monkeypatch = request.getfuncargvalue('monkeypatch')
|
||||
monkeypatch.setattr(io, 'exists', lambda _: True)
|
||||
|
||||
def test_empty(fake_fileexists):
|
||||
s = Scanner()
|
||||
r = s.GetDupeGroups([])
|
||||
eq_(r, [])
|
||||
|
||||
def test_default_settings(fake_fileexists):
|
||||
s = Scanner()
|
||||
eq_(s.min_match_percentage, 80)
|
||||
eq_(s.scan_type, ScanType.Filename)
|
||||
eq_(s.mix_file_kind, True)
|
||||
eq_(s.word_weighting, False)
|
||||
eq_(s.match_similar_words, False)
|
||||
assert isinstance(s.ignore_list, IgnoreList)
|
||||
|
||||
def test_simple_with_default_settings(fake_fileexists):
|
||||
s = Scanner()
|
||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
#'foo bleh' cannot be in the group because the default min match % is 80
|
||||
eq_(len(g), 2)
|
||||
assert g.ref in f[:2]
|
||||
assert g.dupes[0] in f[:2]
|
||||
|
||||
def test_simple_with_lower_min_match(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 50
|
||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
eq_(len(g), 3)
|
||||
|
||||
def test_trim_all_ref_groups(fake_fileexists):
|
||||
# When all files of a group are ref, don't include that group in the results, but also don't
|
||||
# count the files from that group as discarded.
|
||||
s = Scanner()
|
||||
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
||||
f[2].is_ref = True
|
||||
f[3].is_ref = True
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(s.discarded_file_count, 0)
|
||||
|
||||
def test_priorize(fake_fileexists):
|
||||
s = Scanner()
|
||||
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
||||
f[1].size = 2
|
||||
f[2].size = 3
|
||||
f[3].is_ref = True
|
||||
r = s.GetDupeGroups(f)
|
||||
g1, g2 = r
|
||||
assert f[1] in (g1.ref,g2.ref)
|
||||
assert f[0] in (g1.dupes[0],g2.dupes[0])
|
||||
assert f[3] in (g1.ref,g2.ref)
|
||||
assert f[2] in (g1.dupes[0],g2.dupes[0])
|
||||
|
||||
def test_content_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = f[0].md5partial = 'foobar'
|
||||
f[1].md5 = f[1].md5partial = 'foobar'
|
||||
f[2].md5 = f[2].md5partial = 'bleh'
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
||||
|
||||
def test_content_scan_compare_sizes_first(fake_fileexists):
|
||||
class MyFile(no):
|
||||
@property
|
||||
def md5(file):
|
||||
raise AssertionError()
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan_non_ascii(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['title'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.title = 'foobar\u00e9'
|
||||
o2.title = 'foobar\u00e9'
|
||||
try:
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
except UnicodeEncodeError:
|
||||
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [MyFile('foo', 1), MyFile('bar', 2)]
|
||||
eq_(len(s.GetDupeGroups(f)), 0)
|
||||
|
||||
def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = f[0].md5partial = 'foobar'
|
||||
f[1].md5 = f[1].md5partial = 'foobar'
|
||||
f[2].md5 = f[2].md5partial = 'bleh'
|
||||
s.min_match_percentage = 101
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
s.min_match_percentage = 0
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
f = [no('foo'),no('bar')]
|
||||
f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||
f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
|
||||
r = s.GetDupeGroups(f)
|
||||
g = r[0]
|
||||
|
||||
def test_extension_is_not_counted_in_filename_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 100
|
||||
f = [no('foo.bar'), no('foo.bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
def test_job(fake_fileexists):
|
||||
def do_progress(progress, desc=''):
|
||||
log.append(progress)
|
||||
return True
|
||||
|
||||
s = Scanner()
|
||||
log = []
|
||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||
r = s.GetDupeGroups(f, job.Job(1, do_progress))
|
||||
eq_(log[0], 0)
|
||||
eq_(log[-1], 100)
|
||||
|
||||
def test_mix_file_kind(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.mix_file_kind = False
|
||||
f = [no('foo.1'), no('foo.2')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 0)
|
||||
|
||||
def test_word_weighting(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.min_match_percentage = 75
|
||||
s.word_weighting = True
|
||||
f = [no('foo bar'), no('foo bar bleh')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
m = g.get_match_of(g.dupes[0])
|
||||
eq_(m.percentage, 75) # 16 letters, 12 matching
|
||||
|
||||
def test_similar_words(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.match_similar_words = True
|
||||
f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 2)
|
||||
|
||||
def test_fields(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Fields
|
||||
f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 0)
|
||||
|
||||
def test_fields_no_order(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.FieldsNoOrder
|
||||
f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'The Air Near My Fingers'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'The Air Near My Fingers'
|
||||
r = s.GetDupeGroups([o1,o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_with_album_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o3 = no('bleh')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'The Air Near My Fingers'
|
||||
o1.album = 'Elephant'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'The Air Near My Fingers'
|
||||
o2.album = 'Elephant'
|
||||
o3.artist = 'The White Stripes'
|
||||
o3.title = 'The Air Near My Fingers'
|
||||
o3.album = 'foobar'
|
||||
r = s.GetDupeGroups([o1,o2,o3])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||
s.min_match_percentage = 50
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes - a'
|
||||
o1.title = 'The Air Near My Fingers - a'
|
||||
o1.album = 'Elephant - a'
|
||||
o2.artist = 'The White Stripes - b'
|
||||
o2.title = 'The Air Near My Fingers - b'
|
||||
o2.album = 'Elephant - b'
|
||||
r = s.GetDupeGroups([o1,o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan_with_different_scanned(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['track', 'year'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.title = 'some title'
|
||||
o1.track = 'foo'
|
||||
o1.year = 'bar'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.title = 'another title'
|
||||
o2.track = 'foo'
|
||||
o2.year = 'bar'
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan_only_scans_existing_tags(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['artist', 'foo'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.artist = 'The White Stripes'
|
||||
o1.foo = 'foo'
|
||||
o2.artist = 'The White Stripes'
|
||||
o2.foo = 'bar'
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
eq_(len(r), 1) # Because 'foo' is not scanned, they match
|
||||
|
||||
def test_tag_scan_converts_to_str(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['track'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.track = 42
|
||||
o2.track = 42
|
||||
try:
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
except TypeError:
|
||||
raise AssertionError()
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_tag_scan_non_ascii(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Tag
|
||||
s.scanned_tags = set(['title'])
|
||||
o1 = no('foo')
|
||||
o2 = no('bar')
|
||||
o1.title = 'foobar\u00e9'
|
||||
o2.title = 'foobar\u00e9'
|
||||
try:
|
||||
r = s.GetDupeGroups([o1, o2])
|
||||
except UnicodeEncodeError:
|
||||
raise AssertionError()
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_audio_content_scan(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.ContentsAudio
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = 'foo'
|
||||
f[1].md5 = 'bar'
|
||||
f[2].md5 = 'bleh'
|
||||
f[0].md5partial = 'foo'
|
||||
f[1].md5partial = 'foo'
|
||||
f[2].md5partial = 'bleh'
|
||||
f[0].audiosize = 1
|
||||
f[1].audiosize = 1
|
||||
f[2].audiosize = 1
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
def test_audio_content_scan_compare_sizes_first(fake_fileexists):
|
||||
class MyFile(no):
|
||||
@property
|
||||
def md5partial(file):
|
||||
raise AssertionError()
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_audio_content_scan(self):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.ContentsAudio
|
||||
f = [no('foo'), no('bar'), no('bleh')]
|
||||
f[0].md5 = 'foo'
|
||||
f[1].md5 = 'bar'
|
||||
f[2].md5 = 'bleh'
|
||||
f[0].md5partial = 'foo'
|
||||
f[1].md5partial = 'foo'
|
||||
f[2].md5partial = 'bleh'
|
||||
f[0].audiosize = 1
|
||||
f[1].audiosize = 1
|
||||
f[2].audiosize = 1
|
||||
r = s.GetDupeGroups(f)
|
||||
eq_(len(r), 1)
|
||||
eq_(len(r[0]), 2)
|
||||
|
||||
def test_audio_content_scan_compare_sizes_first(self):
|
||||
class MyFile(no):
|
||||
@property
|
||||
def md5partial(file):
|
||||
raise AssertionError()
|
||||
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.ContentsAudio
|
||||
f = [MyFile('foo'), MyFile('bar')]
|
||||
f[0].audiosize = 1
|
||||
f[1].audiosize = 2
|
||||
eq_(len(s.GetDupeGroups(f)), 0)
|
||||
|
||||
def test_ignore_list(self):
|
||||
s = Scanner()
|
||||
f1 = no('foobar')
|
||||
f2 = no('foobar')
|
||||
f3 = no('foobar')
|
||||
f1.path = Path('dir1/foobar')
|
||||
f2.path = Path('dir2/foobar')
|
||||
f3.path = Path('dir3/foobar')
|
||||
s.ignore_list.Ignore(str(f1.path),str(f2.path))
|
||||
s.ignore_list.Ignore(str(f1.path),str(f3.path))
|
||||
r = s.GetDupeGroups([f1,f2,f3])
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
eq_(len(g.dupes), 1)
|
||||
assert f1 not in g
|
||||
assert f2 in g
|
||||
assert f3 in g
|
||||
# Ignored matches are not counted as discarded
|
||||
eq_(s.discarded_file_count, 0)
|
||||
|
||||
def test_ignore_list_checks_for_unicode(self):
|
||||
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
|
||||
#be unicode(path)
|
||||
s = Scanner()
|
||||
f1 = no('foobar')
|
||||
f2 = no('foobar')
|
||||
f3 = no('foobar')
|
||||
f1.path = Path('foo1\u00e9')
|
||||
f2.path = Path('foo2\u00e9')
|
||||
f3.path = Path('foo3\u00e9')
|
||||
s.ignore_list.Ignore(str(f1.path),str(f2.path))
|
||||
s.ignore_list.Ignore(str(f1.path),str(f3.path))
|
||||
r = s.GetDupeGroups([f1,f2,f3])
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
eq_(len(g.dupes), 1)
|
||||
assert f1 not in g
|
||||
assert f2 in g
|
||||
assert f3 in g
|
||||
|
||||
def test_file_evaluates_to_false(self):
|
||||
# A very wrong way to use any() was added at some point, causing resulting group list
|
||||
# to be empty.
|
||||
class FalseNamedObject(NamedObject):
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
|
||||
s = Scanner()
|
||||
f1 = FalseNamedObject('foobar')
|
||||
f2 = FalseNamedObject('foobar')
|
||||
r = s.GetDupeGroups([f1, f2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_size_threshold(self):
|
||||
# Only file equal or higher than the size_threshold in size are scanned
|
||||
s = Scanner()
|
||||
f1 = no('foo', 1)
|
||||
f2 = no('foo', 2)
|
||||
f3 = no('foo', 3)
|
||||
s.size_threshold = 2
|
||||
groups = s.GetDupeGroups([f1,f2,f3])
|
||||
eq_(len(groups), 1)
|
||||
[group] = groups
|
||||
eq_(len(group), 2)
|
||||
assert f1 not in group
|
||||
assert f2 in group
|
||||
assert f3 in group
|
||||
|
||||
def test_tie_breaker_path_deepness(self):
|
||||
# If there is a tie in prioritization, path deepness is used as a tie breaker
|
||||
s = Scanner()
|
||||
o1, o2 = no('foo'), no('foo')
|
||||
o1.path = Path('foo')
|
||||
o2.path = Path('foo/bar')
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
def test_tie_breaker_copy(self):
|
||||
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
|
||||
s = Scanner()
|
||||
o1, o2 = no('foo bar Copy'), no('foo bar')
|
||||
o1.path = Path('deeper/path')
|
||||
o2.path = Path('foo')
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
def test_tie_breaker_same_name_plus_digit(self):
|
||||
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
||||
# becomes a dupe
|
||||
s = Scanner()
|
||||
o1 = no('foo bar 42')
|
||||
o2 = no('foo bar [42]')
|
||||
o3 = no('foo bar (42)')
|
||||
o4 = no('foo bar {42}')
|
||||
o5 = no('foo bar')
|
||||
# all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
|
||||
# used as tie breakers
|
||||
o1.path = Path('deeper/path')
|
||||
o2.path = Path('deeper/path')
|
||||
o3.path = Path('deeper/path')
|
||||
o4.path = Path('deeper/path')
|
||||
o5.path = Path('foo')
|
||||
[group] = s.GetDupeGroups([o1, o2, o3, o4, o5])
|
||||
assert group.ref is o5
|
||||
|
||||
def test_partial_group_match(self):
|
||||
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
||||
# group) in Scanner.discarded_file_count
|
||||
s = Scanner()
|
||||
o1, o2, o3 = no('a b'), no('a'), no('b')
|
||||
s.min_match_percentage = 50
|
||||
[group] = s.GetDupeGroups([o1, o2, o3])
|
||||
eq_(len(group), 2)
|
||||
assert o1 in group
|
||||
assert o2 in group
|
||||
assert o3 not in group
|
||||
eq_(s.discarded_file_count, 1)
|
||||
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.ContentsAudio
|
||||
f = [MyFile('foo'), MyFile('bar')]
|
||||
f[0].audiosize = 1
|
||||
f[1].audiosize = 2
|
||||
eq_(len(s.GetDupeGroups(f)), 0)
|
||||
|
||||
def test_ignore_list(fake_fileexists):
|
||||
s = Scanner()
|
||||
f1 = no('foobar')
|
||||
f2 = no('foobar')
|
||||
f3 = no('foobar')
|
||||
f1.path = Path('dir1/foobar')
|
||||
f2.path = Path('dir2/foobar')
|
||||
f3.path = Path('dir3/foobar')
|
||||
s.ignore_list.Ignore(str(f1.path),str(f2.path))
|
||||
s.ignore_list.Ignore(str(f1.path),str(f3.path))
|
||||
r = s.GetDupeGroups([f1,f2,f3])
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
eq_(len(g.dupes), 1)
|
||||
assert f1 not in g
|
||||
assert f2 in g
|
||||
assert f3 in g
|
||||
# Ignored matches are not counted as discarded
|
||||
eq_(s.discarded_file_count, 0)
|
||||
|
||||
def test_ignore_list_checks_for_unicode(fake_fileexists):
|
||||
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
|
||||
#be unicode(path)
|
||||
s = Scanner()
|
||||
f1 = no('foobar')
|
||||
f2 = no('foobar')
|
||||
f3 = no('foobar')
|
||||
f1.path = Path('foo1\u00e9')
|
||||
f2.path = Path('foo2\u00e9')
|
||||
f3.path = Path('foo3\u00e9')
|
||||
s.ignore_list.Ignore(str(f1.path),str(f2.path))
|
||||
s.ignore_list.Ignore(str(f1.path),str(f3.path))
|
||||
r = s.GetDupeGroups([f1,f2,f3])
|
||||
eq_(len(r), 1)
|
||||
g = r[0]
|
||||
eq_(len(g.dupes), 1)
|
||||
assert f1 not in g
|
||||
assert f2 in g
|
||||
assert f3 in g
|
||||
|
||||
def test_file_evaluates_to_false(fake_fileexists):
|
||||
# A very wrong way to use any() was added at some point, causing resulting group list
|
||||
# to be empty.
|
||||
class FalseNamedObject(NamedObject):
|
||||
def __bool__(self):
|
||||
return False
|
||||
|
||||
|
||||
class ScannerTest(TestCase):
|
||||
def test_dont_group_files_that_dont_exist(self):
|
||||
# when creating groups, check that files exist first. It's possible that these files have
|
||||
# been moved during the scan by the user.
|
||||
# In this test, we have to delete one of the files between the get_matches() part and the
|
||||
# get_groups() part.
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
p = self.tmppath()
|
||||
io.open(p + 'file1', 'w').write('foo')
|
||||
io.open(p + 'file2', 'w').write('foo')
|
||||
file1, file2 = fs.get_files(p)
|
||||
def getmatches(*args, **kw):
|
||||
io.remove(file2.path)
|
||||
return [Match(file1, file2, 100)]
|
||||
s._getmatches = getmatches
|
||||
|
||||
assert not s.GetDupeGroups([file1, file2])
|
||||
|
||||
s = Scanner()
|
||||
f1 = FalseNamedObject('foobar')
|
||||
f2 = FalseNamedObject('foobar')
|
||||
r = s.GetDupeGroups([f1, f2])
|
||||
eq_(len(r), 1)
|
||||
|
||||
def test_size_threshold(fake_fileexists):
|
||||
# Only file equal or higher than the size_threshold in size are scanned
|
||||
s = Scanner()
|
||||
f1 = no('foo', 1)
|
||||
f2 = no('foo', 2)
|
||||
f3 = no('foo', 3)
|
||||
s.size_threshold = 2
|
||||
groups = s.GetDupeGroups([f1,f2,f3])
|
||||
eq_(len(groups), 1)
|
||||
[group] = groups
|
||||
eq_(len(group), 2)
|
||||
assert f1 not in group
|
||||
assert f2 in group
|
||||
assert f3 in group
|
||||
|
||||
def test_tie_breaker_path_deepness(fake_fileexists):
|
||||
# If there is a tie in prioritization, path deepness is used as a tie breaker
|
||||
s = Scanner()
|
||||
o1, o2 = no('foo'), no('foo')
|
||||
o1.path = Path('foo')
|
||||
o2.path = Path('foo/bar')
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
def test_tie_breaker_copy(fake_fileexists):
|
||||
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
|
||||
s = Scanner()
|
||||
o1, o2 = no('foo bar Copy'), no('foo bar')
|
||||
o1.path = Path('deeper/path')
|
||||
o2.path = Path('foo')
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
def test_tie_breaker_same_name_plus_digit(fake_fileexists):
|
||||
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
||||
# becomes a dupe
|
||||
s = Scanner()
|
||||
o1 = no('foo bar 42')
|
||||
o2 = no('foo bar [42]')
|
||||
o3 = no('foo bar (42)')
|
||||
o4 = no('foo bar {42}')
|
||||
o5 = no('foo bar')
|
||||
# all numbered names have deeper paths, so they'll end up ref if the digits aren't correctly
|
||||
# used as tie breakers
|
||||
o1.path = Path('deeper/path')
|
||||
o2.path = Path('deeper/path')
|
||||
o3.path = Path('deeper/path')
|
||||
o4.path = Path('deeper/path')
|
||||
o5.path = Path('foo')
|
||||
[group] = s.GetDupeGroups([o1, o2, o3, o4, o5])
|
||||
assert group.ref is o5
|
||||
|
||||
def test_partial_group_match(fake_fileexists):
|
||||
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
||||
# group) in Scanner.discarded_file_count
|
||||
s = Scanner()
|
||||
o1, o2, o3 = no('a b'), no('a'), no('b')
|
||||
s.min_match_percentage = 50
|
||||
[group] = s.GetDupeGroups([o1, o2, o3])
|
||||
eq_(len(group), 2)
|
||||
assert o1 in group
|
||||
assert o2 in group
|
||||
assert o3 not in group
|
||||
eq_(s.discarded_file_count, 1)
|
||||
|
||||
def test_dont_group_files_that_dont_exist(tmpdir):
|
||||
# when creating groups, check that files exist first. It's possible that these files have
|
||||
# been moved during the scan by the user.
|
||||
# In this test, we have to delete one of the files between the get_matches() part and the
|
||||
# get_groups() part.
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.Contents
|
||||
p = Path(str(tmpdir))
|
||||
io.open(p + 'file1', 'w').write('foo')
|
||||
io.open(p + 'file2', 'w').write('foo')
|
||||
file1, file2 = fs.get_files(p)
|
||||
def getmatches(*args, **kw):
|
||||
io.remove(file2.path)
|
||||
return [Match(file1, file2, 100)]
|
||||
s._getmatches = getmatches
|
||||
|
||||
assert not s.GetDupeGroups([file1, file2])
|
||||
|
@ -9,12 +9,11 @@
|
||||
|
||||
from hsutil import io
|
||||
from hsutil.path import Path
|
||||
from hsutil.testcase import TestCase
|
||||
|
||||
from core.engine import getwords
|
||||
from ..scanner import *
|
||||
|
||||
class NamedObject(object):
|
||||
class NamedObject:
|
||||
def __init__(self, name="foobar", size=1):
|
||||
self.name = name
|
||||
self.size = size
|
||||
@ -24,18 +23,18 @@ class NamedObject(object):
|
||||
|
||||
no = NamedObject
|
||||
|
||||
class ScannerTestFakeFiles(TestCase):
|
||||
def setUp(self):
|
||||
# This is a hack to avoid invalidating all previous tests since the scanner started to test
|
||||
# for file existence before doing the match grouping.
|
||||
self.mock(io, 'exists', lambda _: True)
|
||||
|
||||
def test_priorize_me(self):
|
||||
# in ScannerME, bitrate goes first (right after is_ref) in priorization
|
||||
s = ScannerME()
|
||||
o1, o2 = no('foo'), no('foo')
|
||||
o1.bitrate = 1
|
||||
o2.bitrate = 2
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
def pytest_funcarg__fake_fileexists(request):
|
||||
# This is a hack to avoid invalidating all previous tests since the scanner started to test
|
||||
# for file existence before doing the match grouping.
|
||||
monkeypatch = request.getfuncargvalue('monkeypatch')
|
||||
monkeypatch.setattr(io, 'exists', lambda _: True)
|
||||
|
||||
def test_priorize_me(fake_fileexists):
|
||||
# in ScannerME, bitrate goes first (right after is_ref) in priorization
|
||||
s = ScannerME()
|
||||
o1, o2 = no('foo'), no('foo')
|
||||
o1.bitrate = 1
|
||||
o2.bitrate = 2
|
||||
[group] = s.GetDupeGroups([o1, o2])
|
||||
assert group.ref is o2
|
||||
|
||||
|
@ -9,37 +9,35 @@
|
||||
|
||||
import hashlib
|
||||
|
||||
from hsutil.testcase import TestCase
|
||||
from hsutil.testutil import eq_
|
||||
from hsutil.path import Path
|
||||
from hscommon.testutil import eq_
|
||||
from core.fs import File
|
||||
from core.tests.directories_test import create_fake_fs
|
||||
|
||||
from .. import fs
|
||||
|
||||
class TCBundle(TestCase):
|
||||
def test_size_aggregates_subfiles(self):
|
||||
p = create_fake_fs(self.tmppath())
|
||||
b = fs.Bundle(p)
|
||||
eq_(b.size, 12)
|
||||
|
||||
def test_md5_aggregate_subfiles_sorted(self):
|
||||
#dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
|
||||
#all files' md5 it contains, but it must make sure that it does so in the
|
||||
#same order everytime.
|
||||
p = create_fake_fs(self.tmppath())
|
||||
b = fs.Bundle(p)
|
||||
md5s = File(p + ('dir1', 'file1.test')).md5
|
||||
md5s += File(p + ('dir2', 'file2.test')).md5
|
||||
md5s += File(p + ('dir3', 'file3.test')).md5
|
||||
md5s += File(p + 'file1.test').md5
|
||||
md5s += File(p + 'file2.test').md5
|
||||
md5s += File(p + 'file3.test').md5
|
||||
md5 = hashlib.md5(md5s)
|
||||
eq_(b.md5, md5.digest())
|
||||
|
||||
def test_has_file_attrs(self):
|
||||
#a Bundle must behave like a file, so it must have mtime attributes
|
||||
b = fs.Bundle(self.tmppath())
|
||||
assert b.mtime > 0
|
||||
eq_(b.extension, '')
|
||||
|
||||
def test_size_aggregates_subfiles(tmpdir):
|
||||
p = create_fake_fs(Path(str(tmpdir)))
|
||||
b = fs.Bundle(p)
|
||||
eq_(b.size, 12)
|
||||
|
||||
def test_md5_aggregate_subfiles_sorted(tmpdir):
|
||||
#dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
|
||||
#all files' md5 it contains, but it must make sure that it does so in the
|
||||
#same order everytime.
|
||||
p = create_fake_fs(Path(str(tmpdir)))
|
||||
b = fs.Bundle(p)
|
||||
md5s = File(p + ('dir1', 'file1.test')).md5
|
||||
md5s += File(p + ('dir2', 'file2.test')).md5
|
||||
md5s += File(p + ('dir3', 'file3.test')).md5
|
||||
md5s += File(p + 'file1.test').md5
|
||||
md5s += File(p + 'file2.test').md5
|
||||
md5s += File(p + 'file3.test').md5
|
||||
md5 = hashlib.md5(md5s)
|
||||
eq_(b.md5, md5.digest())
|
||||
|
||||
def test_has_file_attrs(tmpdir):
|
||||
#a Bundle must behave like a file, so it must have mtime attributes
|
||||
b = fs.Bundle(Path(str(tmpdir)))
|
||||
assert b.mtime > 0
|
||||
eq_(b.extension, '')
|
||||
|
Loading…
x
Reference in New Issue
Block a user