diff --git a/core/scanner.py b/core/scanner.py index ea68a20a..8791c8e3 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -77,15 +77,22 @@ class Scanner: self.discarded_file_count = 0 def _getmatches(self, files, j): - if self.size_threshold or self.scan_type in { - ScanType.CONTENTS, - ScanType.FOLDERS, - }: + if ( + self.size_threshold + or self.large_size_threshold + or self.scan_type + in { + ScanType.CONTENTS, + ScanType.FOLDERS, + } + ): j = j.start_subjob([2, 8]) for f in j.iter_with_progress(files, tr("Read size of %d/%d files")): f.size # pre-read, makes a smoother progress if read here (especially for bundles) if self.size_threshold: files = [f for f in files if f.size >= self.size_threshold] + if self.large_size_threshold: + files = [f for f in files if f.size <= self.large_size_threshold] if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}: return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j) else: @@ -202,5 +209,6 @@ class Scanner: scan_type = ScanType.FILENAME scanned_tags = {"artist", "title"} size_threshold = 0 + large_size_threshold = 0 big_file_size_threshold = 0 word_weighting = False diff --git a/core/tests/scanner_test.py b/core/tests/scanner_test.py index e9167360..c0aae5b4 100644 --- a/core/tests/scanner_test.py +++ b/core/tests/scanner_test.py @@ -56,6 +56,8 @@ def test_default_settings(fake_fileexists): eq_(s.mix_file_kind, True) eq_(s.word_weighting, False) eq_(s.match_similar_words, False) + eq_(s.size_threshold, 0) + eq_(s.large_size_threshold, 0) eq_(s.big_file_size_threshold, 0) @@ -142,6 +144,50 @@ def test_content_scan_compare_sizes_first(fake_fileexists): eq_(len(s.get_dupe_groups(f)), 0) +def test_ignore_file_size(fake_fileexists): + s = Scanner() + s.scan_type = ScanType.CONTENTS + small_size = 10 # 10KB + s.size_threshold = 0 + large_size = 100 * 1024 * 1024 # 100MB + s.large_size_threshold = 0 + f = [ + no("smallignore1", small_size - 1), + no("smallignore2", small_size - 1), + no("small1", small_size), + no("small2", small_size), + no("large1", large_size), + no("large2", large_size), + no("largeignore1", large_size + 1), + no("largeignore2", large_size + 1), + ] + f[0].md5 = f[0].md5partial = f[0].md5samples = "smallignore" + f[1].md5 = f[1].md5partial = f[1].md5samples = "smallignore" + f[2].md5 = f[2].md5partial = f[2].md5samples = "small" + f[3].md5 = f[3].md5partial = f[3].md5samples = "small" + f[4].md5 = f[4].md5partial = f[4].md5samples = "large" + f[5].md5 = f[5].md5partial = f[5].md5samples = "large" + f[6].md5 = f[6].md5partial = f[6].md5samples = "largeignore" + f[7].md5 = f[7].md5partial = f[7].md5samples = "largeignore" + + r = s.get_dupe_groups(f) + # No ignores + eq_(len(r), 4) + # Ignore smaller + s.size_threshold = small_size + r = s.get_dupe_groups(f) + eq_(len(r), 3) + # Ignore larger + s.size_threshold = 0 + s.large_size_threshold = large_size + r = s.get_dupe_groups(f) + eq_(len(r), 3) + # Ignore both + s.size_threshold = small_size + r = s.get_dupe_groups(f) + eq_(len(r), 2) + + def test_big_file_partial_hashes(fake_fileexists): s = Scanner() s.scan_type = ScanType.CONTENTS diff --git a/qt/app.py b/qt/app.py index cfffb233..1626a974 100644 --- a/qt/app.py +++ b/qt/app.py @@ -166,6 +166,10 @@ class DupeGuru(QObject): self.model.options["match_similar_words"] = self.prefs.match_similar threshold = self.prefs.small_file_threshold if self.prefs.ignore_small_files else 0 self.model.options["size_threshold"] = threshold * 1024 # threshold is in KB. The scanner wants bytes + large_threshold = self.prefs.large_file_threshold if self.prefs.ignore_large_files else 0 + self.model.options["large_size_threshold"] = ( + large_threshold * 1024 * 1024 + ) # threshold is in MB. The Scanner wants bytes big_file_size_threshold = self.prefs.big_file_size_threshold if self.prefs.big_file_partial_hashes else 0 self.model.options["big_file_size_threshold"] = ( big_file_size_threshold diff --git a/qt/preferences.py b/qt/preferences.py index eb57eaac..9c8875dc 100644 --- a/qt/preferences.py +++ b/qt/preferences.py @@ -72,6 +72,8 @@ class Preferences(PreferencesBase): self.match_similar = get("MatchSimilar", self.match_similar) self.ignore_small_files = get("IgnoreSmallFiles", self.ignore_small_files) self.small_file_threshold = get("SmallFileThreshold", self.small_file_threshold) + self.ignore_large_files = get("IgnoreLargeFiles", self.ignore_large_files) + self.large_file_threshold = get("LargeFileThreshold", self.large_file_threshold) self.big_file_partial_hashes = get("BigFilePartialHashes", self.big_file_partial_hashes) self.big_file_size_threshold = get("BigFileSizeThreshold", self.big_file_size_threshold) self.scan_tag_track = get("ScanTagTrack", self.scan_tag_track) @@ -119,6 +121,8 @@ class Preferences(PreferencesBase): self.match_similar = False self.ignore_small_files = True self.small_file_threshold = 10 # KB + self.ignore_large_files = False + self.large_file_threshold = 1000 # MB self.big_file_partial_hashes = False self.big_file_size_threshold = 100 # MB self.scan_tag_track = False @@ -167,6 +171,8 @@ class Preferences(PreferencesBase): set_("MatchSimilar", self.match_similar) set_("IgnoreSmallFiles", self.ignore_small_files) set_("SmallFileThreshold", self.small_file_threshold) + set_("IgnoreLargeFiles", self.ignore_large_files) + set_("LargeFileThreshold", self.large_file_threshold) set_("BigFilePartialHashes", self.big_file_partial_hashes) set_("BigFileSizeThreshold", self.big_file_size_threshold) set_("ScanTagTrack", self.scan_tag_track) diff --git a/qt/se/preferences_dialog.py b/qt/se/preferences_dialog.py index ba79679a..7ceeba4b 100644 --- a/qt/se/preferences_dialog.py +++ b/qt/se/preferences_dialog.py @@ -64,6 +64,21 @@ class PreferencesDialog(PreferencesDialogBase): spacer_item1 = QSpacerItem(40, 20, QSizePolicy.Expanding, QSizePolicy.Minimum) self.horizontalLayout_2.addItem(spacer_item1) self.verticalLayout_4.addLayout(self.horizontalLayout_2) + self.horizontalLayout_2a = QHBoxLayout() + self._setupAddCheckbox("ignoreLargeFilesBox", tr("Ignore files larger than"), self.widget) + self.horizontalLayout_2a.addWidget(self.ignoreLargeFilesBox) + self.sizeSaturationSpinBox = QSpinBox(self.widget) + size_policy = QSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed) + self.sizeSaturationSpinBox.setSizePolicy(size_policy) + self.sizeSaturationSpinBox.setMaximumSize(QSize(100, 16777215)) + self.sizeSaturationSpinBox.setRange(0, 1000000) + self.horizontalLayout_2a.addWidget(self.sizeSaturationSpinBox) + self.label_6a = QLabel(self.widget) + self.label_6a.setText(tr("MB")) + self.horizontalLayout_2a.addWidget(self.label_6a) + spacer_item3 = QSpacerItem(40, 20, QSizePolicy.Expanding, QSizePolicy.Minimum) + self.horizontalLayout_2a.addItem(spacer_item3) + self.verticalLayout_4.addLayout(self.horizontalLayout_2a) self.horizontalLayout_2b = QHBoxLayout() self._setupAddCheckbox( "bigFilePartialHashesBox", @@ -98,6 +113,8 @@ class PreferencesDialog(PreferencesDialogBase): setchecked(self.wordWeightingBox, prefs.word_weighting) setchecked(self.ignoreSmallFilesBox, prefs.ignore_small_files) self.sizeThresholdSpinBox.setValue(prefs.small_file_threshold) + setchecked(self.ignoreLargeFilesBox, prefs.ignore_large_files) + self.sizeSaturationSpinBox.setValue(prefs.large_file_threshold) setchecked(self.bigFilePartialHashesBox, prefs.big_file_partial_hashes) self.bigSizeThresholdSpinBox.setValue(prefs.big_file_size_threshold) @@ -113,5 +130,7 @@ class PreferencesDialog(PreferencesDialogBase): prefs.word_weighting = ischecked(self.wordWeightingBox) prefs.ignore_small_files = ischecked(self.ignoreSmallFilesBox) prefs.small_file_threshold = self.sizeThresholdSpinBox.value() + prefs.ignore_large_files = ischecked(self.ignoreLargeFilesBox) + prefs.large_file_threshold = self.sizeSaturationSpinBox.value() prefs.big_file_partial_hashes = ischecked(self.bigFilePartialHashesBox) prefs.big_file_size_threshold = self.bigSizeThresholdSpinBox.value()