diff --git a/base/py/scanner.py b/base/py/scanner.py
index 18b83444..ff59d523 100644
--- a/base/py/scanner.py
+++ b/base/py/scanner.py
@@ -109,13 +109,13 @@ class Scanner(object):
                 m.first.words = m.second.words = ['--']
         logging.info('Grouping matches')
         groups = engine.get_groups(matches, j)
+        matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
+        self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
         groups = [g for g in groups if any(not f.is_ref for f in g)]
         logging.info('Created %d groups' % len(groups))
         j.set_progress(100, 'Doing group prioritization')
         for g in groups:
             g.prioritize(self._key_func, self._tie_breaker)
-        matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
-        self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
         return groups
     
     match_factory        = None
diff --git a/base/py/tests/scanner_test.py b/base/py/tests/scanner_test.py
index d683e405..7356d658 100644
--- a/base/py/tests/scanner_test.py
+++ b/base/py/tests/scanner_test.py
@@ -62,12 +62,15 @@ def test_simple_with_lower_min_match():
     eq_(len(g), 3)
 
 def test_trim_all_ref_groups():
+    # When all files of a group are ref, don't include that group in the results, but also don't
+    # count the files from that group as discarded.
     s = Scanner()
     f = [no('foo'), no('foo'), no('bar'), no('bar')]
     f[2].is_ref = True
     f[3].is_ref = True
     r = s.GetDupeGroups(f)
     eq_(len(r), 1)
+    eq_(s.discarded_file_count, 0)
 
 def test_priorize():
     s = Scanner()