mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
Simplify progress report during scanning
We now get less progress feedback, but in exchange, our progress job is simpler. Previously, our progress bar would often get wonky towards the end of the scan and I didn't have the energy to debug that. Besides, people don't care about that level of progress feedback.
This commit is contained in:
parent
334f4dd2ae
commit
b1ef3dc8fe
@ -449,7 +449,7 @@ class Group:
|
|||||||
return self[0]
|
return self[0]
|
||||||
|
|
||||||
|
|
||||||
def get_groups(matches, j=job.nulljob):
|
def get_groups(matches):
|
||||||
"""Returns a list of :class:`Group` from ``matches``.
|
"""Returns a list of :class:`Group` from ``matches``.
|
||||||
|
|
||||||
Create groups out of match pairs in the smartest way possible.
|
Create groups out of match pairs in the smartest way possible.
|
||||||
@ -458,7 +458,7 @@ def get_groups(matches, j=job.nulljob):
|
|||||||
dupe2group = {}
|
dupe2group = {}
|
||||||
groups = []
|
groups = []
|
||||||
try:
|
try:
|
||||||
for match in j.iter_with_progress(matches, tr("Grouped %d/%d matches"), JOB_REFRESH_RATE):
|
for match in matches:
|
||||||
first, second, _ = match
|
first, second, _ = match
|
||||||
first_group = dupe2group.get(first)
|
first_group = dupe2group.get(first)
|
||||||
second_group = dupe2group.get(second)
|
second_group = dupe2group.get(second)
|
||||||
|
@ -130,14 +130,13 @@ class Scanner:
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob):
|
def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob):
|
||||||
j = j.start_subjob([8, 2])
|
|
||||||
for f in (f for f in files if not hasattr(f, 'is_ref')):
|
for f in (f for f in files if not hasattr(f, 'is_ref')):
|
||||||
f.is_ref = False
|
f.is_ref = False
|
||||||
files = remove_dupe_paths(files)
|
files = remove_dupe_paths(files)
|
||||||
logging.info("Getting matches. Scan type: %d", self.scan_type)
|
logging.info("Getting matches. Scan type: %d", self.scan_type)
|
||||||
matches = self._getmatches(files, j)
|
matches = self._getmatches(files, j)
|
||||||
logging.info('Found %d matches' % len(matches))
|
logging.info('Found %d matches' % len(matches))
|
||||||
j.set_progress(100, tr("Removing false matches"))
|
j.set_progress(100, tr("Almost done! Fiddling with results..."))
|
||||||
# In removing what we call here "false matches", we first want to remove, if we scan by
|
# In removing what we call here "false matches", we first want to remove, if we scan by
|
||||||
# folders, we want to remove folder matches for which the parent is also in a match (they're
|
# folders, we want to remove folder matches for which the parent is also in a match (they're
|
||||||
# "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the
|
# "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the
|
||||||
@ -160,16 +159,14 @@ class Scanner:
|
|||||||
matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
|
matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
|
||||||
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
|
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
|
||||||
if ignore_list:
|
if ignore_list:
|
||||||
j = j.start_subjob(2)
|
|
||||||
iter_matches = j.iter_with_progress(matches, tr("Processed %d/%d matches against the ignore list"))
|
|
||||||
matches = [
|
matches = [
|
||||||
m for m in iter_matches
|
m for m in matches
|
||||||
if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
|
if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
|
||||||
]
|
]
|
||||||
logging.info('Grouping matches')
|
logging.info('Grouping matches')
|
||||||
groups = engine.get_groups(matches, j)
|
groups = engine.get_groups(matches)
|
||||||
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
|
||||||
if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}:
|
if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}:
|
||||||
|
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
|
||||||
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
|
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
|
||||||
else:
|
else:
|
||||||
# Ticket #195
|
# Ticket #195
|
||||||
@ -185,7 +182,6 @@ class Scanner:
|
|||||||
self.discarded_file_count = 0
|
self.discarded_file_count = 0
|
||||||
groups = [g for g in groups if any(not f.is_ref for f in g)]
|
groups = [g for g in groups if any(not f.is_ref for f in g)]
|
||||||
logging.info('Created %d groups' % len(groups))
|
logging.info('Created %d groups' % len(groups))
|
||||||
j.set_progress(100, tr("Doing group prioritization"))
|
|
||||||
for g in groups:
|
for g in groups:
|
||||||
g.prioritize(self._key_func, self._tie_breaker)
|
g.prioritize(self._key_func, self._tie_breaker)
|
||||||
return groups
|
return groups
|
||||||
|
@ -803,20 +803,6 @@ class TestCaseget_groups:
|
|||||||
r = get_groups([m1, m2, m3])
|
r = get_groups([m1, m2, m3])
|
||||||
eq_(3, len(r[0]))
|
eq_(3, len(r[0]))
|
||||||
|
|
||||||
def test_job(self):
|
|
||||||
def do_progress(p, d=''):
|
|
||||||
self.log.append(p)
|
|
||||||
return True
|
|
||||||
|
|
||||||
self.log = []
|
|
||||||
j = job.Job(1, do_progress)
|
|
||||||
m1, m2, m3 = get_match_triangle()
|
|
||||||
#101%: To make sure it is processed first so the job test works correctly
|
|
||||||
m4 = Match(NamedObject('a', True), NamedObject('a', True), 101)
|
|
||||||
get_groups([m1, m2, m3, m4], j)
|
|
||||||
eq_(0, self.log[0])
|
|
||||||
eq_(100, self.log[-1])
|
|
||||||
|
|
||||||
def test_group_admissible_discarded_dupes(self):
|
def test_group_admissible_discarded_dupes(self):
|
||||||
# If, with a (A, B, C, D) set, all match with A, but C and D don't match with B and that the
|
# If, with a (A, B, C, D) set, all match with A, but C and D don't match with B and that the
|
||||||
# (A, B) match is the highest (thus resulting in an (A, B) group), still match C and D
|
# (A, B) match is the highest (thus resulting in an (A, B) group), still match C and D
|
||||||
|
Loading…
x
Reference in New Issue
Block a user