mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
[#72 state:fixed] When files are deleted during the scan, don't include them in the grouping phase.
--HG-- extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%40225
This commit is contained in:
parent
88127d8b8d
commit
f070e90347
@ -208,7 +208,9 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob)
|
|||||||
j = j.start_subjob([2, 8])
|
j = j.start_subjob([2, 8])
|
||||||
size2files = defaultdict(set)
|
size2files = defaultdict(set)
|
||||||
for file in j.iter_with_progress(files, 'Read size of %d/%d files'):
|
for file in j.iter_with_progress(files, 'Read size of %d/%d files'):
|
||||||
size2files[getattr(file, sizeattr)].add(file)
|
filesize = getattr(file, sizeattr)
|
||||||
|
if filesize:
|
||||||
|
size2files[filesize].add(file)
|
||||||
possible_matches = [files for files in size2files.values() if len(files) > 1]
|
possible_matches = [files for files in size2files.values() if len(files) > 1]
|
||||||
del size2files
|
del size2files
|
||||||
result = []
|
result = []
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
from hsutil import job
|
from hsutil import job, io
|
||||||
from hsutil.misc import dedupe
|
from hsutil.misc import dedupe
|
||||||
from hsutil.str import get_file_ext, rem_file_ext
|
from hsutil.str import get_file_ext, rem_file_ext
|
||||||
|
|
||||||
@ -80,9 +80,10 @@ class Scanner(object):
|
|||||||
logging.info('Getting matches')
|
logging.info('Getting matches')
|
||||||
matches = self._getmatches(files, j)
|
matches = self._getmatches(files, j)
|
||||||
logging.info('Found %d matches' % len(matches))
|
logging.info('Found %d matches' % len(matches))
|
||||||
if not self.mix_file_kind:
|
|
||||||
j.set_progress(100, 'Removing false matches')
|
j.set_progress(100, 'Removing false matches')
|
||||||
|
if not self.mix_file_kind:
|
||||||
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
|
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
|
||||||
|
matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)]
|
||||||
if self.ignore_list:
|
if self.ignore_list:
|
||||||
j = j.start_subjob(2)
|
j = j.start_subjob(2)
|
||||||
iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
|
iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
|
||||||
|
@ -15,16 +15,21 @@ from hsutil import job
|
|||||||
from hsutil.decorators import log_calls
|
from hsutil.decorators import log_calls
|
||||||
from hsutil.testcase import TestCase
|
from hsutil.testcase import TestCase
|
||||||
|
|
||||||
from .. import engine
|
from .. import engine, fs
|
||||||
from ..engine import *
|
from ..engine import *
|
||||||
|
|
||||||
class NamedObject(object):
|
class NamedObject(object):
|
||||||
def __init__(self, name="foobar", with_words=False):
|
def __init__(self, name="foobar", with_words=False, size=1):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
self.size = size
|
||||||
|
self.md5partial = name
|
||||||
|
self.md5 = name
|
||||||
if with_words:
|
if with_words:
|
||||||
self.words = getwords(name)
|
self.words = getwords(name)
|
||||||
|
|
||||||
|
|
||||||
|
no = NamedObject
|
||||||
|
|
||||||
def get_match_triangle():
|
def get_match_triangle():
|
||||||
o1 = NamedObject(with_words=True)
|
o1 = NamedObject(with_words=True)
|
||||||
o2 = NamedObject(with_words=True)
|
o2 = NamedObject(with_words=True)
|
||||||
@ -486,6 +491,12 @@ class GetMatches(TestCase):
|
|||||||
self.assertEqual(42, len(r))
|
self.assertEqual(42, len(r))
|
||||||
|
|
||||||
|
|
||||||
|
class GetMatchesByContents(TestCase):
|
||||||
|
def test_dont_compare_empty_files(self):
|
||||||
|
o1, o2 = no(size=0), no(size=0)
|
||||||
|
assert not getmatches_by_contents([o1, o2])
|
||||||
|
|
||||||
|
|
||||||
class TCGroup(TestCase):
|
class TCGroup(TestCase):
|
||||||
def test_empy(self):
|
def test_empy(self):
|
||||||
g = Group()
|
g = Group()
|
||||||
|
@ -21,7 +21,6 @@ from .. import engine
|
|||||||
from ..results import *
|
from ..results import *
|
||||||
|
|
||||||
class NamedObject(engine_test.NamedObject):
|
class NamedObject(engine_test.NamedObject):
|
||||||
size = 1
|
|
||||||
path = property(lambda x:Path('basepath') + x.name)
|
path = property(lambda x:Path('basepath') + x.name)
|
||||||
is_ref = False
|
is_ref = False
|
||||||
|
|
||||||
|
@ -9,9 +9,11 @@
|
|||||||
|
|
||||||
from nose.tools import eq_
|
from nose.tools import eq_
|
||||||
|
|
||||||
from hsutil import job
|
from hsutil import job, io
|
||||||
from hsutil.path import Path
|
from hsutil.path import Path
|
||||||
|
from hsutil.testcase import TestCase
|
||||||
|
|
||||||
|
from .. import fs
|
||||||
from ..engine import getwords, Match
|
from ..engine import getwords, Match
|
||||||
from ..ignore import IgnoreList
|
from ..ignore import IgnoreList
|
||||||
from ..scanner import *
|
from ..scanner import *
|
||||||
@ -27,12 +29,18 @@ class NamedObject(object):
|
|||||||
no = NamedObject
|
no = NamedObject
|
||||||
|
|
||||||
#--- Scanner
|
#--- Scanner
|
||||||
def test_empty():
|
class ScannerTestFakeFiles(TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# This is a hack to avoid invalidating all previous tests since the scanner started to test
|
||||||
|
# for file existence before doing the match grouping.
|
||||||
|
self.mock(io, 'exists', lambda _: True)
|
||||||
|
|
||||||
|
def test_empty(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
r = s.GetDupeGroups([])
|
r = s.GetDupeGroups([])
|
||||||
eq_(r, [])
|
eq_(r, [])
|
||||||
|
|
||||||
def test_default_settings():
|
def test_default_settings(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
eq_(s.min_match_percentage, 80)
|
eq_(s.min_match_percentage, 80)
|
||||||
eq_(s.scan_type, SCAN_TYPE_FILENAME)
|
eq_(s.scan_type, SCAN_TYPE_FILENAME)
|
||||||
@ -41,7 +49,7 @@ def test_default_settings():
|
|||||||
eq_(s.match_similar_words, False)
|
eq_(s.match_similar_words, False)
|
||||||
assert isinstance(s.ignore_list, IgnoreList)
|
assert isinstance(s.ignore_list, IgnoreList)
|
||||||
|
|
||||||
def test_simple_with_default_settings():
|
def test_simple_with_default_settings(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||||
r = s.GetDupeGroups(f)
|
r = s.GetDupeGroups(f)
|
||||||
@ -52,7 +60,7 @@ def test_simple_with_default_settings():
|
|||||||
assert g.ref in f[:2]
|
assert g.ref in f[:2]
|
||||||
assert g.dupes[0] in f[:2]
|
assert g.dupes[0] in f[:2]
|
||||||
|
|
||||||
def test_simple_with_lower_min_match():
|
def test_simple_with_lower_min_match(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.min_match_percentage = 50
|
s.min_match_percentage = 50
|
||||||
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
f = [no('foo bar'), no('foo bar'), no('foo bleh')]
|
||||||
@ -61,7 +69,7 @@ def test_simple_with_lower_min_match():
|
|||||||
g = r[0]
|
g = r[0]
|
||||||
eq_(len(g), 3)
|
eq_(len(g), 3)
|
||||||
|
|
||||||
def test_trim_all_ref_groups():
|
def test_trim_all_ref_groups(self):
|
||||||
# When all files of a group are ref, don't include that group in the results, but also don't
|
# When all files of a group are ref, don't include that group in the results, but also don't
|
||||||
# count the files from that group as discarded.
|
# count the files from that group as discarded.
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
@ -72,7 +80,7 @@ def test_trim_all_ref_groups():
|
|||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
eq_(s.discarded_file_count, 0)
|
eq_(s.discarded_file_count, 0)
|
||||||
|
|
||||||
def test_priorize():
|
def test_priorize(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
f = [no('foo'), no('foo'), no('bar'), no('bar')]
|
||||||
f[1].size = 2
|
f[1].size = 2
|
||||||
@ -85,7 +93,7 @@ def test_priorize():
|
|||||||
assert f[3] in (g1.ref,g2.ref)
|
assert f[3] in (g1.ref,g2.ref)
|
||||||
assert f[2] in (g1.dupes[0],g2.dupes[0])
|
assert f[2] in (g1.dupes[0],g2.dupes[0])
|
||||||
|
|
||||||
def test_content_scan():
|
def test_content_scan(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_CONTENT
|
s.scan_type = SCAN_TYPE_CONTENT
|
||||||
f = [no('foo'), no('bar'), no('bleh')]
|
f = [no('foo'), no('bar'), no('bleh')]
|
||||||
@ -97,7 +105,7 @@ def test_content_scan():
|
|||||||
eq_(len(r[0]), 2)
|
eq_(len(r[0]), 2)
|
||||||
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
||||||
|
|
||||||
def test_content_scan_compare_sizes_first():
|
def test_content_scan_compare_sizes_first(self):
|
||||||
class MyFile(no):
|
class MyFile(no):
|
||||||
@property
|
@property
|
||||||
def md5(file):
|
def md5(file):
|
||||||
@ -108,7 +116,7 @@ def test_content_scan_compare_sizes_first():
|
|||||||
f = [MyFile('foo', 1), MyFile('bar', 2)]
|
f = [MyFile('foo', 1), MyFile('bar', 2)]
|
||||||
eq_(len(s.GetDupeGroups(f)), 0)
|
eq_(len(s.GetDupeGroups(f)), 0)
|
||||||
|
|
||||||
def test_min_match_perc_doesnt_matter_for_content_scan():
|
def test_min_match_perc_doesnt_matter_for_content_scan(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_CONTENT
|
s.scan_type = SCAN_TYPE_CONTENT
|
||||||
f = [no('foo'), no('bar'), no('bleh')]
|
f = [no('foo'), no('bar'), no('bleh')]
|
||||||
@ -124,7 +132,7 @@ def test_min_match_perc_doesnt_matter_for_content_scan():
|
|||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
eq_(len(r[0]), 2)
|
eq_(len(r[0]), 2)
|
||||||
|
|
||||||
def test_content_scan_doesnt_put_md5_in_words_at_the_end():
|
def test_content_scan_doesnt_put_md5_in_words_at_the_end(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_CONTENT
|
s.scan_type = SCAN_TYPE_CONTENT
|
||||||
f = [no('foo'),no('bar')]
|
f = [no('foo'),no('bar')]
|
||||||
@ -133,7 +141,7 @@ def test_content_scan_doesnt_put_md5_in_words_at_the_end():
|
|||||||
r = s.GetDupeGroups(f)
|
r = s.GetDupeGroups(f)
|
||||||
g = r[0]
|
g = r[0]
|
||||||
|
|
||||||
def test_extension_is_not_counted_in_filename_scan():
|
def test_extension_is_not_counted_in_filename_scan(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.min_match_percentage = 100
|
s.min_match_percentage = 100
|
||||||
f = [no('foo.bar'), no('foo.bleh')]
|
f = [no('foo.bar'), no('foo.bleh')]
|
||||||
@ -141,7 +149,7 @@ def test_extension_is_not_counted_in_filename_scan():
|
|||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
eq_(len(r[0]), 2)
|
eq_(len(r[0]), 2)
|
||||||
|
|
||||||
def test_job():
|
def test_job(self):
|
||||||
def do_progress(progress, desc=''):
|
def do_progress(progress, desc=''):
|
||||||
log.append(progress)
|
log.append(progress)
|
||||||
return True
|
return True
|
||||||
@ -153,14 +161,14 @@ def test_job():
|
|||||||
eq_(log[0], 0)
|
eq_(log[0], 0)
|
||||||
eq_(log[-1], 100)
|
eq_(log[-1], 100)
|
||||||
|
|
||||||
def test_mix_file_kind():
|
def test_mix_file_kind(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.mix_file_kind = False
|
s.mix_file_kind = False
|
||||||
f = [no('foo.1'), no('foo.2')]
|
f = [no('foo.1'), no('foo.2')]
|
||||||
r = s.GetDupeGroups(f)
|
r = s.GetDupeGroups(f)
|
||||||
eq_(len(r), 0)
|
eq_(len(r), 0)
|
||||||
|
|
||||||
def test_word_weighting():
|
def test_word_weighting(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.min_match_percentage = 75
|
s.min_match_percentage = 75
|
||||||
s.word_weighting = True
|
s.word_weighting = True
|
||||||
@ -171,28 +179,28 @@ def test_word_weighting():
|
|||||||
m = g.get_match_of(g.dupes[0])
|
m = g.get_match_of(g.dupes[0])
|
||||||
eq_(m.percentage, 75) # 16 letters, 12 matching
|
eq_(m.percentage, 75) # 16 letters, 12 matching
|
||||||
|
|
||||||
def test_similar_words():
|
def test_similar_words(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.match_similar_words = True
|
s.match_similar_words = True
|
||||||
f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
|
f = [no('The White Stripes'), no('The Whites Stripe'), no('Limp Bizkit'), no('Limp Bizkitt')]
|
||||||
r = s.GetDupeGroups(f)
|
r = s.GetDupeGroups(f)
|
||||||
eq_(len(r), 2)
|
eq_(len(r), 2)
|
||||||
|
|
||||||
def test_fields():
|
def test_fields(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_FIELDS
|
s.scan_type = SCAN_TYPE_FIELDS
|
||||||
f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
|
f = [no('The White Stripes - Little Ghost'), no('The White Stripes - Little Acorn')]
|
||||||
r = s.GetDupeGroups(f)
|
r = s.GetDupeGroups(f)
|
||||||
eq_(len(r), 0)
|
eq_(len(r), 0)
|
||||||
|
|
||||||
def test_fields_no_order():
|
def test_fields_no_order(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_FIELDS_NO_ORDER
|
s.scan_type = SCAN_TYPE_FIELDS_NO_ORDER
|
||||||
f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
|
f = [no('The White Stripes - Little Ghost'), no('Little Ghost - The White Stripes')]
|
||||||
r = s.GetDupeGroups(f)
|
r = s.GetDupeGroups(f)
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_tag_scan():
|
def test_tag_scan(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_TAG
|
s.scan_type = SCAN_TYPE_TAG
|
||||||
o1 = no('foo')
|
o1 = no('foo')
|
||||||
@ -204,7 +212,7 @@ def test_tag_scan():
|
|||||||
r = s.GetDupeGroups([o1,o2])
|
r = s.GetDupeGroups([o1,o2])
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_tag_with_album_scan():
|
def test_tag_with_album_scan(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_TAG
|
s.scan_type = SCAN_TYPE_TAG
|
||||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||||
@ -223,7 +231,7 @@ def test_tag_with_album_scan():
|
|||||||
r = s.GetDupeGroups([o1,o2,o3])
|
r = s.GetDupeGroups([o1,o2,o3])
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_that_dash_in_tags_dont_create_new_fields():
|
def test_that_dash_in_tags_dont_create_new_fields(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_TAG
|
s.scan_type = SCAN_TYPE_TAG
|
||||||
s.scanned_tags = set(['artist', 'album', 'title'])
|
s.scanned_tags = set(['artist', 'album', 'title'])
|
||||||
@ -239,7 +247,7 @@ def test_that_dash_in_tags_dont_create_new_fields():
|
|||||||
r = s.GetDupeGroups([o1,o2])
|
r = s.GetDupeGroups([o1,o2])
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_tag_scan_with_different_scanned():
|
def test_tag_scan_with_different_scanned(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_TAG
|
s.scan_type = SCAN_TYPE_TAG
|
||||||
s.scanned_tags = set(['track', 'year'])
|
s.scanned_tags = set(['track', 'year'])
|
||||||
@ -256,7 +264,7 @@ def test_tag_scan_with_different_scanned():
|
|||||||
r = s.GetDupeGroups([o1, o2])
|
r = s.GetDupeGroups([o1, o2])
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_tag_scan_only_scans_existing_tags():
|
def test_tag_scan_only_scans_existing_tags(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_TAG
|
s.scan_type = SCAN_TYPE_TAG
|
||||||
s.scanned_tags = set(['artist', 'foo'])
|
s.scanned_tags = set(['artist', 'foo'])
|
||||||
@ -269,7 +277,7 @@ def test_tag_scan_only_scans_existing_tags():
|
|||||||
r = s.GetDupeGroups([o1, o2])
|
r = s.GetDupeGroups([o1, o2])
|
||||||
eq_(len(r), 1) # Because 'foo' is not scanned, they match
|
eq_(len(r), 1) # Because 'foo' is not scanned, they match
|
||||||
|
|
||||||
def test_tag_scan_converts_to_str():
|
def test_tag_scan_converts_to_str(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_TAG
|
s.scan_type = SCAN_TYPE_TAG
|
||||||
s.scanned_tags = set(['track'])
|
s.scanned_tags = set(['track'])
|
||||||
@ -283,7 +291,7 @@ def test_tag_scan_converts_to_str():
|
|||||||
raise AssertionError()
|
raise AssertionError()
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_tag_scan_non_ascii():
|
def test_tag_scan_non_ascii(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_TAG
|
s.scan_type = SCAN_TYPE_TAG
|
||||||
s.scanned_tags = set(['title'])
|
s.scanned_tags = set(['title'])
|
||||||
@ -297,7 +305,7 @@ def test_tag_scan_non_ascii():
|
|||||||
raise AssertionError()
|
raise AssertionError()
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_audio_content_scan():
|
def test_audio_content_scan(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = SCAN_TYPE_CONTENT_AUDIO
|
s.scan_type = SCAN_TYPE_CONTENT_AUDIO
|
||||||
f = [no('foo'), no('bar'), no('bleh')]
|
f = [no('foo'), no('bar'), no('bleh')]
|
||||||
@ -314,7 +322,7 @@ def test_audio_content_scan():
|
|||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
eq_(len(r[0]), 2)
|
eq_(len(r[0]), 2)
|
||||||
|
|
||||||
def test_audio_content_scan_compare_sizes_first():
|
def test_audio_content_scan_compare_sizes_first(self):
|
||||||
class MyFile(no):
|
class MyFile(no):
|
||||||
@property
|
@property
|
||||||
def md5partial(file):
|
def md5partial(file):
|
||||||
@ -327,7 +335,7 @@ def test_audio_content_scan_compare_sizes_first():
|
|||||||
f[1].audiosize = 2
|
f[1].audiosize = 2
|
||||||
eq_(len(s.GetDupeGroups(f)), 0)
|
eq_(len(s.GetDupeGroups(f)), 0)
|
||||||
|
|
||||||
def test_ignore_list():
|
def test_ignore_list(self):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
f1 = no('foobar')
|
f1 = no('foobar')
|
||||||
f2 = no('foobar')
|
f2 = no('foobar')
|
||||||
@ -347,7 +355,7 @@ def test_ignore_list():
|
|||||||
# Ignored matches are not counted as discarded
|
# Ignored matches are not counted as discarded
|
||||||
eq_(s.discarded_file_count, 0)
|
eq_(s.discarded_file_count, 0)
|
||||||
|
|
||||||
def test_ignore_list_checks_for_unicode():
|
def test_ignore_list_checks_for_unicode(self):
|
||||||
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
|
#scanner was calling path_str for ignore list checks. Since the Path changes, it must
|
||||||
#be unicode(path)
|
#be unicode(path)
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
@ -367,7 +375,7 @@ def test_ignore_list_checks_for_unicode():
|
|||||||
assert f2 in g
|
assert f2 in g
|
||||||
assert f3 in g
|
assert f3 in g
|
||||||
|
|
||||||
def test_file_evaluates_to_false():
|
def test_file_evaluates_to_false(self):
|
||||||
# A very wrong way to use any() was added at some point, causing resulting group list
|
# A very wrong way to use any() was added at some point, causing resulting group list
|
||||||
# to be empty.
|
# to be empty.
|
||||||
class FalseNamedObject(NamedObject):
|
class FalseNamedObject(NamedObject):
|
||||||
@ -381,7 +389,7 @@ def test_file_evaluates_to_false():
|
|||||||
r = s.GetDupeGroups([f1, f2])
|
r = s.GetDupeGroups([f1, f2])
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
def test_size_threshold():
|
def test_size_threshold(self):
|
||||||
# Only file equal or higher than the size_threshold in size are scanned
|
# Only file equal or higher than the size_threshold in size are scanned
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
f1 = no('foo', 1)
|
f1 = no('foo', 1)
|
||||||
@ -396,7 +404,7 @@ def test_size_threshold():
|
|||||||
assert f2 in group
|
assert f2 in group
|
||||||
assert f3 in group
|
assert f3 in group
|
||||||
|
|
||||||
def test_tie_breaker_path_deepness():
|
def test_tie_breaker_path_deepness(self):
|
||||||
# If there is a tie in prioritization, path deepness is used as a tie breaker
|
# If there is a tie in prioritization, path deepness is used as a tie breaker
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
o1, o2 = no('foo'), no('foo')
|
o1, o2 = no('foo'), no('foo')
|
||||||
@ -405,7 +413,7 @@ def test_tie_breaker_path_deepness():
|
|||||||
[group] = s.GetDupeGroups([o1, o2])
|
[group] = s.GetDupeGroups([o1, o2])
|
||||||
assert group.ref is o2
|
assert group.ref is o2
|
||||||
|
|
||||||
def test_tie_breaker_copy():
|
def test_tie_breaker_copy(self):
|
||||||
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
|
# if copy is in the words used (even if it has a deeper path), it becomes a dupe
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
o1, o2 = no('foo bar Copy'), no('foo bar')
|
o1, o2 = no('foo bar Copy'), no('foo bar')
|
||||||
@ -414,7 +422,7 @@ def test_tie_breaker_copy():
|
|||||||
[group] = s.GetDupeGroups([o1, o2])
|
[group] = s.GetDupeGroups([o1, o2])
|
||||||
assert group.ref is o2
|
assert group.ref is o2
|
||||||
|
|
||||||
def test_tie_breaker_same_name_plus_digit():
|
def test_tie_breaker_same_name_plus_digit(self):
|
||||||
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
# if ref has the same words as dupe, but has some just one extra word which is a digit, it
|
||||||
# becomes a dupe
|
# becomes a dupe
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
@ -424,7 +432,7 @@ def test_tie_breaker_same_name_plus_digit():
|
|||||||
[group] = s.GetDupeGroups([o1, o2])
|
[group] = s.GetDupeGroups([o1, o2])
|
||||||
assert group.ref is o2
|
assert group.ref is o2
|
||||||
|
|
||||||
def test_partial_group_match():
|
def test_partial_group_match(self):
|
||||||
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
# Count the number od discarded matches (when a file doesn't match all other dupes of the
|
||||||
# group) in Scanner.discarded_file_count
|
# group) in Scanner.discarded_file_count
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
@ -436,3 +444,24 @@ def test_partial_group_match():
|
|||||||
assert o2 in group
|
assert o2 in group
|
||||||
assert o3 not in group
|
assert o3 not in group
|
||||||
eq_(s.discarded_file_count, 1)
|
eq_(s.discarded_file_count, 1)
|
||||||
|
|
||||||
|
|
||||||
|
class ScannerTest(TestCase):
|
||||||
|
def test_dont_group_files_that_dont_exist(self):
|
||||||
|
# when creating groups, check that files exist first. It's possible that these files have
|
||||||
|
# been moved during the scan by the user.
|
||||||
|
# In this test, we have to delete one of the files between the get_matches() part and the
|
||||||
|
# get_groups() part.
|
||||||
|
s = Scanner()
|
||||||
|
s.scan_type = SCAN_TYPE_CONTENT
|
||||||
|
p = self.tmppath()
|
||||||
|
io.open(p + 'file1', 'w').write('foo')
|
||||||
|
io.open(p + 'file2', 'w').write('foo')
|
||||||
|
file1, file2 = fs.get_files(p)
|
||||||
|
def getmatches(*args, **kw):
|
||||||
|
io.remove(file2.path)
|
||||||
|
return [Match(file1, file2, 100)]
|
||||||
|
s._getmatches = getmatches
|
||||||
|
|
||||||
|
assert not s.GetDupeGroups([file1, file2])
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user