mirror of
				https://github.com/arsenetar/dupeguru.git
				synced 2025-09-11 17:58:17 +00:00 
			
		
		
		
	Add preference to ignore large files, close #430
This commit is contained in:
		
							parent
							
								
									809116c764
								
							
						
					
					
						commit
						3045361243
					
				| @ -77,15 +77,22 @@ class Scanner: | ||||
|         self.discarded_file_count = 0 | ||||
| 
 | ||||
|     def _getmatches(self, files, j): | ||||
|         if self.size_threshold or self.scan_type in { | ||||
|             ScanType.CONTENTS, | ||||
|             ScanType.FOLDERS, | ||||
|         }: | ||||
|         if ( | ||||
|             self.size_threshold | ||||
|             or self.large_size_threshold | ||||
|             or self.scan_type | ||||
|             in { | ||||
|                 ScanType.CONTENTS, | ||||
|                 ScanType.FOLDERS, | ||||
|             } | ||||
|         ): | ||||
|             j = j.start_subjob([2, 8]) | ||||
|             for f in j.iter_with_progress(files, tr("Read size of %d/%d files")): | ||||
|                 f.size  # pre-read, makes a smoother progress if read here (especially for bundles) | ||||
|             if self.size_threshold: | ||||
|                 files = [f for f in files if f.size >= self.size_threshold] | ||||
|             if self.large_size_threshold: | ||||
|                 files = [f for f in files if f.size <= self.large_size_threshold] | ||||
|         if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}: | ||||
|             return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j) | ||||
|         else: | ||||
| @ -202,5 +209,6 @@ class Scanner: | ||||
|     scan_type = ScanType.FILENAME | ||||
|     scanned_tags = {"artist", "title"} | ||||
|     size_threshold = 0 | ||||
|     large_size_threshold = 0 | ||||
|     big_file_size_threshold = 0 | ||||
|     word_weighting = False | ||||
|  | ||||
| @ -56,6 +56,8 @@ def test_default_settings(fake_fileexists): | ||||
|     eq_(s.mix_file_kind, True) | ||||
|     eq_(s.word_weighting, False) | ||||
|     eq_(s.match_similar_words, False) | ||||
|     eq_(s.size_threshold, 0) | ||||
|     eq_(s.large_size_threshold, 0) | ||||
|     eq_(s.big_file_size_threshold, 0) | ||||
| 
 | ||||
| 
 | ||||
| @ -142,6 +144,50 @@ def test_content_scan_compare_sizes_first(fake_fileexists): | ||||
|     eq_(len(s.get_dupe_groups(f)), 0) | ||||
| 
 | ||||
| 
 | ||||
| def test_ignore_file_size(fake_fileexists): | ||||
|     s = Scanner() | ||||
|     s.scan_type = ScanType.CONTENTS | ||||
|     small_size = 10  # 10KB | ||||
|     s.size_threshold = 0 | ||||
|     large_size = 100 * 1024 * 1024  # 100MB | ||||
|     s.large_size_threshold = 0 | ||||
|     f = [ | ||||
|         no("smallignore1", small_size - 1), | ||||
|         no("smallignore2", small_size - 1), | ||||
|         no("small1", small_size), | ||||
|         no("small2", small_size), | ||||
|         no("large1", large_size), | ||||
|         no("large2", large_size), | ||||
|         no("largeignore1", large_size + 1), | ||||
|         no("largeignore2", large_size + 1), | ||||
|     ] | ||||
|     f[0].md5 = f[0].md5partial = f[0].md5samples = "smallignore" | ||||
|     f[1].md5 = f[1].md5partial = f[1].md5samples = "smallignore" | ||||
|     f[2].md5 = f[2].md5partial = f[2].md5samples = "small" | ||||
|     f[3].md5 = f[3].md5partial = f[3].md5samples = "small" | ||||
|     f[4].md5 = f[4].md5partial = f[4].md5samples = "large" | ||||
|     f[5].md5 = f[5].md5partial = f[5].md5samples = "large" | ||||
|     f[6].md5 = f[6].md5partial = f[6].md5samples = "largeignore" | ||||
|     f[7].md5 = f[7].md5partial = f[7].md5samples = "largeignore" | ||||
| 
 | ||||
|     r = s.get_dupe_groups(f) | ||||
|     # No ignores | ||||
|     eq_(len(r), 4) | ||||
|     # Ignore smaller | ||||
|     s.size_threshold = small_size | ||||
|     r = s.get_dupe_groups(f) | ||||
|     eq_(len(r), 3) | ||||
|     # Ignore larger | ||||
|     s.size_threshold = 0 | ||||
|     s.large_size_threshold = large_size | ||||
|     r = s.get_dupe_groups(f) | ||||
|     eq_(len(r), 3) | ||||
|     # Ignore both | ||||
|     s.size_threshold = small_size | ||||
|     r = s.get_dupe_groups(f) | ||||
|     eq_(len(r), 2) | ||||
| 
 | ||||
| 
 | ||||
| def test_big_file_partial_hashes(fake_fileexists): | ||||
|     s = Scanner() | ||||
|     s.scan_type = ScanType.CONTENTS | ||||
|  | ||||
| @ -166,6 +166,10 @@ class DupeGuru(QObject): | ||||
|         self.model.options["match_similar_words"] = self.prefs.match_similar | ||||
|         threshold = self.prefs.small_file_threshold if self.prefs.ignore_small_files else 0 | ||||
|         self.model.options["size_threshold"] = threshold * 1024  # threshold is in KB. The scanner wants bytes | ||||
|         large_threshold = self.prefs.large_file_threshold if self.prefs.ignore_large_files else 0 | ||||
|         self.model.options["large_size_threshold"] = ( | ||||
|             large_threshold * 1024 * 1024 | ||||
|         )  # threshold is in MB. The Scanner wants bytes | ||||
|         big_file_size_threshold = self.prefs.big_file_size_threshold if self.prefs.big_file_partial_hashes else 0 | ||||
|         self.model.options["big_file_size_threshold"] = ( | ||||
|             big_file_size_threshold | ||||
|  | ||||
| @ -72,6 +72,8 @@ class Preferences(PreferencesBase): | ||||
|         self.match_similar = get("MatchSimilar", self.match_similar) | ||||
|         self.ignore_small_files = get("IgnoreSmallFiles", self.ignore_small_files) | ||||
|         self.small_file_threshold = get("SmallFileThreshold", self.small_file_threshold) | ||||
|         self.ignore_large_files = get("IgnoreLargeFiles", self.ignore_large_files) | ||||
|         self.large_file_threshold = get("LargeFileThreshold", self.large_file_threshold) | ||||
|         self.big_file_partial_hashes = get("BigFilePartialHashes", self.big_file_partial_hashes) | ||||
|         self.big_file_size_threshold = get("BigFileSizeThreshold", self.big_file_size_threshold) | ||||
|         self.scan_tag_track = get("ScanTagTrack", self.scan_tag_track) | ||||
| @ -119,6 +121,8 @@ class Preferences(PreferencesBase): | ||||
|         self.match_similar = False | ||||
|         self.ignore_small_files = True | ||||
|         self.small_file_threshold = 10  # KB | ||||
|         self.ignore_large_files = False | ||||
|         self.large_file_threshold = 1000  # MB | ||||
|         self.big_file_partial_hashes = False | ||||
|         self.big_file_size_threshold = 100  # MB | ||||
|         self.scan_tag_track = False | ||||
| @ -167,6 +171,8 @@ class Preferences(PreferencesBase): | ||||
|         set_("MatchSimilar", self.match_similar) | ||||
|         set_("IgnoreSmallFiles", self.ignore_small_files) | ||||
|         set_("SmallFileThreshold", self.small_file_threshold) | ||||
|         set_("IgnoreLargeFiles", self.ignore_large_files) | ||||
|         set_("LargeFileThreshold", self.large_file_threshold) | ||||
|         set_("BigFilePartialHashes", self.big_file_partial_hashes) | ||||
|         set_("BigFileSizeThreshold", self.big_file_size_threshold) | ||||
|         set_("ScanTagTrack", self.scan_tag_track) | ||||
|  | ||||
| @ -64,6 +64,21 @@ class PreferencesDialog(PreferencesDialogBase): | ||||
|         spacer_item1 = QSpacerItem(40, 20, QSizePolicy.Expanding, QSizePolicy.Minimum) | ||||
|         self.horizontalLayout_2.addItem(spacer_item1) | ||||
|         self.verticalLayout_4.addLayout(self.horizontalLayout_2) | ||||
|         self.horizontalLayout_2a = QHBoxLayout() | ||||
|         self._setupAddCheckbox("ignoreLargeFilesBox", tr("Ignore files larger than"), self.widget) | ||||
|         self.horizontalLayout_2a.addWidget(self.ignoreLargeFilesBox) | ||||
|         self.sizeSaturationSpinBox = QSpinBox(self.widget) | ||||
|         size_policy = QSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed) | ||||
|         self.sizeSaturationSpinBox.setSizePolicy(size_policy) | ||||
|         self.sizeSaturationSpinBox.setMaximumSize(QSize(100, 16777215)) | ||||
|         self.sizeSaturationSpinBox.setRange(0, 1000000) | ||||
|         self.horizontalLayout_2a.addWidget(self.sizeSaturationSpinBox) | ||||
|         self.label_6a = QLabel(self.widget) | ||||
|         self.label_6a.setText(tr("MB")) | ||||
|         self.horizontalLayout_2a.addWidget(self.label_6a) | ||||
|         spacer_item3 = QSpacerItem(40, 20, QSizePolicy.Expanding, QSizePolicy.Minimum) | ||||
|         self.horizontalLayout_2a.addItem(spacer_item3) | ||||
|         self.verticalLayout_4.addLayout(self.horizontalLayout_2a) | ||||
|         self.horizontalLayout_2b = QHBoxLayout() | ||||
|         self._setupAddCheckbox( | ||||
|             "bigFilePartialHashesBox", | ||||
| @ -98,6 +113,8 @@ class PreferencesDialog(PreferencesDialogBase): | ||||
|         setchecked(self.wordWeightingBox, prefs.word_weighting) | ||||
|         setchecked(self.ignoreSmallFilesBox, prefs.ignore_small_files) | ||||
|         self.sizeThresholdSpinBox.setValue(prefs.small_file_threshold) | ||||
|         setchecked(self.ignoreLargeFilesBox, prefs.ignore_large_files) | ||||
|         self.sizeSaturationSpinBox.setValue(prefs.large_file_threshold) | ||||
|         setchecked(self.bigFilePartialHashesBox, prefs.big_file_partial_hashes) | ||||
|         self.bigSizeThresholdSpinBox.setValue(prefs.big_file_size_threshold) | ||||
| 
 | ||||
| @ -113,5 +130,7 @@ class PreferencesDialog(PreferencesDialogBase): | ||||
|         prefs.word_weighting = ischecked(self.wordWeightingBox) | ||||
|         prefs.ignore_small_files = ischecked(self.ignoreSmallFilesBox) | ||||
|         prefs.small_file_threshold = self.sizeThresholdSpinBox.value() | ||||
|         prefs.ignore_large_files = ischecked(self.ignoreLargeFilesBox) | ||||
|         prefs.large_file_threshold = self.sizeSaturationSpinBox.value() | ||||
|         prefs.big_file_partial_hashes = ischecked(self.bigFilePartialHashesBox) | ||||
|         prefs.big_file_size_threshold = self.bigSizeThresholdSpinBox.value() | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user