mirror of
				https://github.com/arsenetar/dupeguru.git
				synced 2025-09-11 17:58:17 +00:00 
			
		
		
		
	[#195 state:fixed] Fixed bug where there would be a false reporting of discarded matches.
This commit is contained in:
		
							parent
							
								
									7dfb42fb41
								
							
						
					
					
						commit
						93781a0f35
					
				| @ -152,7 +152,20 @@ class Scanner: | ||||
|         logging.info('Grouping matches') | ||||
|         groups = engine.get_groups(matches, j) | ||||
|         matched_files = dedupe([m.first for m in matches] + [m.second for m in matches]) | ||||
|         self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) | ||||
|         if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}: | ||||
|             self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) | ||||
|         else: | ||||
|             # Ticket #195 | ||||
|             # To speed up the scan, we don't bother comparing contents of files that are both ref | ||||
|             # files. However, this messes up "discarded" counting because there's a missing match | ||||
|             # in cases where we end up with a dupe group anyway (with a non-ref file). Because it's | ||||
|             # impossible to have discarded matches in exact dupe scans, we simply set it at 0, thus | ||||
|             # bypassing our tricky problem. | ||||
|             # Also, although ScanType.FuzzyBlock is not always doing exact comparisons, we also | ||||
|             # bypass ref comparison, thus messing up with our "discarded" count. So we're | ||||
|             # effectively disabling the "discarded" feature in PE, but it's better than falsely | ||||
|             # reporting discarded matches. | ||||
|             self.discarded_file_count = 0 | ||||
|         groups = [g for g in groups if any(not f.is_ref for f in g)] | ||||
|         logging.info('Created %d groups' % len(groups)) | ||||
|         j.set_progress(100, tr("Doing group prioritization")) | ||||
|  | ||||
| @ -28,7 +28,7 @@ class NamedObject: | ||||
|         self.words = getwords(name) | ||||
|      | ||||
|     def __repr__(self): | ||||
|         return '<NamedObject %r>' % self.name | ||||
|         return '<NamedObject %r %r>' % (self.name, self.path) | ||||
|      | ||||
| 
 | ||||
| no = NamedObject | ||||
| @ -507,3 +507,20 @@ def test_ignore_files_with_same_path(fake_fileexists): | ||||
|     f1 = no('foobar', path='path1/foobar') | ||||
|     f2 = no('foobar', path='path1/foobar') | ||||
|     eq_(s.get_dupe_groups([f1, f2]), []) | ||||
| 
 | ||||
| def test_dont_count_ref_files_as_discarded(fake_fileexists): | ||||
|     # To speed up the scan, we don't bother comparing contents of files that are both ref files. | ||||
|     # However, this causes problems in "discarded" counting and we make sure here that we don't | ||||
|     # report discarded matches in exact duplicate scans. | ||||
|     s = Scanner() | ||||
|     s.scan_type = ScanType.Contents | ||||
|     o1 = no("foo", path="p1") | ||||
|     o2 = no("foo", path="p2") | ||||
|     o3 = no("foo", path="p3") | ||||
|     o1.md5 = o1.md5partial = 'foobar' | ||||
|     o2.md5 = o2.md5partial = 'foobar' | ||||
|     o3.md5 = o3.md5partial = 'foobar' | ||||
|     o1.is_ref = True | ||||
|     o2.is_ref = True | ||||
|     eq_(len(s.get_dupe_groups([o1, o2, o3])), 1) | ||||
|     eq_(s.discarded_file_count, 0) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user