From fbdd1d866ef6191ceb35ef9da23f1a856cfb11d5 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Wed, 8 Jun 2016 12:06:08 -0400 Subject: [PATCH] Simplify getmatches_by_contents() signature partial and sizeattr attributes are not needed anymore. --- core/engine.py | 16 +++++----------- core/scanner.py | 9 +++++---- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/core/engine.py b/core/engine.py index 78e3830b..f4be53bc 100644 --- a/core/engine.py +++ b/core/engine.py @@ -246,21 +246,15 @@ def getmatches( return result return result -def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob): +def getmatches_by_contents(files, j=job.nulljob): """Returns a list of :class:`Match` within ``files`` if their contents is the same. - :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the - file to use for comparison. - :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute - contents hash. :param j: A :ref:`job progress instance `. """ - j = j.start_subjob([2, 8]) size2files = defaultdict(set) - for file in j.iter_with_progress(files, tr("Read size of %d/%d files")): - filesize = getattr(file, sizeattr) - if filesize: - size2files[filesize].add(file) + for f in files: + if f.size: + size2files[f.size].add(f) del files possible_matches = [files for files in size2files.values() if len(files) > 1] del size2files @@ -271,7 +265,7 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob) if first.is_ref and second.is_ref: continue # Don't spend time comparing two ref pics together. if first.md5partial == second.md5partial: - if partial or first.md5 == second.md5: + if first.md5 == second.md5: result.append(Match(first, second, 100)) j.add_progress(desc=tr("%d matches found") % len(result)) return result diff --git a/core/scanner.py b/core/scanner.py index e5bb84a1..a2640b3f 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -72,13 +72,14 @@ class Scanner: self.discarded_file_count = 0 def _getmatches(self, files, j): - if self.size_threshold: + if self.size_threshold or self.scan_type in {ScanType.Contents, ScanType.Folders}: j = j.start_subjob([2, 8]) for f in j.iter_with_progress(files, tr("Read size of %d/%d files")): f.size # pre-read, makes a smoother progress if read here (especially for bundles) - files = [f for f in files if f.size >= self.size_threshold] + if self.size_threshold: + files = [f for f in files if f.size >= self.size_threshold] if self.scan_type in {ScanType.Contents, ScanType.Folders}: - return engine.getmatches_by_contents(files, sizeattr='size', partial=False, j=j) + return engine.getmatches_by_contents(files, j=j) else: j = j.start_subjob([2, 8]) kw = {} @@ -98,7 +99,7 @@ class Scanner: ], }[self.scan_type] for f in j.iter_with_progress(files, tr("Read metadata of %d/%d files")): - logging.debug("Reading metadata of {}".format(str(f.path))) + logging.debug("Reading metadata of %s", f.path) f.words = func(f) return engine.getmatches(files, j=j, **kw)