From fbdd1d866ef6191ceb35ef9da23f1a856cfb11d5 Mon Sep 17 00:00:00 2001
From: Virgil Dupras <hsoft@hardcoded.net>
Date: Wed, 8 Jun 2016 12:06:08 -0400
Subject: [PATCH] Simplify getmatches_by_contents() signature

partial and sizeattr attributes are not needed anymore.
---
 core/engine.py  | 16 +++++-----------
 core/scanner.py |  9 +++++----
 2 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/core/engine.py b/core/engine.py
index 78e3830b..f4be53bc 100644
--- a/core/engine.py
+++ b/core/engine.py
@@ -246,21 +246,15 @@ def getmatches(
         return result
     return result
 
-def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
+def getmatches_by_contents(files, j=job.nulljob):
     """Returns a list of :class:`Match` within ``files`` if their contents is the same.
 
-    :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
-                         file to use for comparison.
-    :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
-                         contents hash.
     :param j: A :ref:`job progress instance <jobs>`.
     """
-    j = j.start_subjob([2, 8])
     size2files = defaultdict(set)
-    for file in j.iter_with_progress(files, tr("Read size of %d/%d files")):
-        filesize = getattr(file, sizeattr)
-        if filesize:
-            size2files[filesize].add(file)
+    for f in files:
+        if f.size:
+            size2files[f.size].add(f)
     del files
     possible_matches = [files for files in size2files.values() if len(files) > 1]
     del size2files
@@ -271,7 +265,7 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob)
             if first.is_ref and second.is_ref:
                 continue # Don't spend time comparing two ref pics together.
             if first.md5partial == second.md5partial:
-                if partial or first.md5 == second.md5:
+                if first.md5 == second.md5:
                     result.append(Match(first, second, 100))
         j.add_progress(desc=tr("%d matches found") % len(result))
     return result
diff --git a/core/scanner.py b/core/scanner.py
index e5bb84a1..a2640b3f 100644
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -72,13 +72,14 @@ class Scanner:
         self.discarded_file_count = 0
 
     def _getmatches(self, files, j):
-        if self.size_threshold:
+        if self.size_threshold or self.scan_type in {ScanType.Contents, ScanType.Folders}:
             j = j.start_subjob([2, 8])
             for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
                 f.size # pre-read, makes a smoother progress if read here (especially for bundles)
-            files = [f for f in files if f.size >= self.size_threshold]
+            if self.size_threshold:
+                files = [f for f in files if f.size >= self.size_threshold]
         if self.scan_type in {ScanType.Contents, ScanType.Folders}:
-            return engine.getmatches_by_contents(files, sizeattr='size', partial=False, j=j)
+            return engine.getmatches_by_contents(files, j=j)
         else:
             j = j.start_subjob([2, 8])
             kw = {}
@@ -98,7 +99,7 @@ class Scanner:
                 ],
             }[self.scan_type]
             for f in j.iter_with_progress(files, tr("Read metadata of %d/%d files")):
-                logging.debug("Reading metadata of {}".format(str(f.path)))
+                logging.debug("Reading metadata of %s", f.path)
                 f.words = func(f)
             return engine.getmatches(files, j=j, **kw)