1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2025-09-11 17:58:17 +00:00

Fixed a bug where groups discarded because all its files are ref would count in the "X discarded" message.

--HG--
extra : convert_revision : svn%3Ac306627e-7827-47d3-bdf0-9a457c9553a1/trunk%40128
This commit is contained in:
hsoft 2009-09-06 15:24:17 +00:00
parent ae6f5d27d8
commit 5508609fdc
2 changed files with 5 additions and 2 deletions

View File

@ -109,13 +109,13 @@ class Scanner(object):
m.first.words = m.second.words = ['--'] m.first.words = m.second.words = ['--']
logging.info('Grouping matches') logging.info('Grouping matches')
groups = engine.get_groups(matches, j) groups = engine.get_groups(matches, j)
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
groups = [g for g in groups if any(not f.is_ref for f in g)] groups = [g for g in groups if any(not f.is_ref for f in g)]
logging.info('Created %d groups' % len(groups)) logging.info('Created %d groups' % len(groups))
j.set_progress(100, 'Doing group prioritization') j.set_progress(100, 'Doing group prioritization')
for g in groups: for g in groups:
g.prioritize(self._key_func, self._tie_breaker) g.prioritize(self._key_func, self._tie_breaker)
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
return groups return groups
match_factory = None match_factory = None

View File

@ -62,12 +62,15 @@ def test_simple_with_lower_min_match():
eq_(len(g), 3) eq_(len(g), 3)
def test_trim_all_ref_groups(): def test_trim_all_ref_groups():
# When all files of a group are ref, don't include that group in the results, but also don't
# count the files from that group as discarded.
s = Scanner() s = Scanner()
f = [no('foo'), no('foo'), no('bar'), no('bar')] f = [no('foo'), no('foo'), no('bar'), no('bar')]
f[2].is_ref = True f[2].is_ref = True
f[3].is_ref = True f[3].is_ref = True
r = s.GetDupeGroups(f) r = s.GetDupeGroups(f)
eq_(len(r), 1) eq_(len(r), 1)
eq_(s.discarded_file_count, 0)
def test_priorize(): def test_priorize():
s = Scanner() s = Scanner()