diff --git a/base/py/scanner.py b/base/py/scanner.py
index 320151c2..a4a8891f 100644
--- a/base/py/scanner.py
+++ b/base/py/scanner.py
@@ -31,6 +31,16 @@ class Scanner(object):
         self.ignore_list = IgnoreList()
         self.discarded_file_count = 0
     
+    @staticmethod
+    def _filter_matches_by_content(matches, partial, j):
+        matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
+        md5attrname = 'md5partial' if partial else 'md5'
+        md5 = lambda f: getattr(f, md5attrname)
+        for matched_file in j.iter_with_progress(matched_files, 'Analyzed %d/%d matching files'):
+            md5(matched_file)
+        j.set_progress(100, 'Removing false matches')
+        return [m for m in matches if md5(m.first) == md5(m.second)]
+    
     def _getmatches(self, files, j):
         j = j.start_subjob(2)
         mf = engine.MatchFactory()
@@ -88,19 +98,14 @@ class Scanner(object):
             iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
             matches = [m for m in iter_matches 
                 if not self.ignore_list.AreIgnored(unicode(m.first.path), unicode(m.second.path))]
-        matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
         if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
-            md5attrname = 'md5partial' if self.scan_type == SCAN_TYPE_CONTENT_AUDIO else 'md5'
-            md5 = lambda f: getattr(f, md5attrname)
-            j = j.start_subjob(2)
-            for matched_file in j.iter_with_progress(matched_files, 'Analyzed %d/%d matching files'):
-                md5(matched_file)
-            j.set_progress(100, 'Removing false matches')
-            matches = [m for m in matches if md5(m.first) == md5(m.second)]
-            words_for_content = ['--'] # We compared md5. No words were involved.
+            j = j.start_subjob(3 if self.scan_type == SCAN_TYPE_CONTENT else 2)
+            matches = self._filter_matches_by_content(matches, partial=True, j=j)
+            if self.scan_type == SCAN_TYPE_CONTENT:
+                matches = self._filter_matches_by_content(matches, partial=False, j=j)
+            # We compared md5. No words were involved.
             for m in matches:
-                m.first.words = words_for_content
-                m.second.words = words_for_content
+                m.first.words = m.second.words = ['--']
         logging.info('Grouping matches')
         groups = engine.get_groups(matches, j)
         groups = [g for g in groups if any(not f.is_ref for f in g)]
diff --git a/base/py/tests/scanner_test.py b/base/py/tests/scanner_test.py
index 5356d030..d683e405 100644
--- a/base/py/tests/scanner_test.py
+++ b/base/py/tests/scanner_test.py
@@ -86,9 +86,9 @@ def test_content_scan():
     s = Scanner()
     s.scan_type = SCAN_TYPE_CONTENT
     f = [no('foo'), no('bar'), no('bleh')]
-    f[0].md5 = 'foobar'
-    f[1].md5 = 'foobar'
-    f[2].md5 = 'bleh'
+    f[0].md5 = f[0].md5partial = 'foobar'
+    f[1].md5 = f[1].md5partial = 'foobar'
+    f[2].md5 = f[2].md5partial = 'bleh'
     r = s.GetDupeGroups(f)
     eq_(len(r), 1)
     eq_(len(r[0]), 2)
@@ -109,9 +109,9 @@ def test_min_match_perc_doesnt_matter_for_content_scan():
     s = Scanner()
     s.scan_type = SCAN_TYPE_CONTENT
     f = [no('foo'), no('bar'), no('bleh')]
-    f[0].md5 = 'foobar'
-    f[1].md5 = 'foobar'
-    f[2].md5 = 'bleh'
+    f[0].md5 = f[0].md5partial = 'foobar'
+    f[1].md5 = f[1].md5partial = 'foobar'
+    f[2].md5 = f[2].md5partial = 'bleh'
     s.min_match_percentage = 101
     r = s.GetDupeGroups(f)
     eq_(len(r), 1)
@@ -121,12 +121,12 @@ def test_min_match_perc_doesnt_matter_for_content_scan():
     eq_(len(r), 1)
     eq_(len(r[0]), 2)
 
-def test_content_scan_puts_md5_in_words_at_the_end():
+def test_content_scan_doesnt_put_md5_in_words_at_the_end():
     s = Scanner()
     s.scan_type = SCAN_TYPE_CONTENT
     f = [no('foo'),no('bar')]
-    f[0].md5 = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
-    f[1].md5 = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
+    f[0].md5 = f[0].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
+    f[1].md5 = f[1].md5partial = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
     r = s.GetDupeGroups(f)
     g = r[0]
     eq_(g.ref.words, ['--'])