mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
Add preference to ignore large files, close #430
This commit is contained in:
parent
809116c764
commit
3045361243
@ -77,15 +77,22 @@ class Scanner:
|
|||||||
self.discarded_file_count = 0
|
self.discarded_file_count = 0
|
||||||
|
|
||||||
def _getmatches(self, files, j):
|
def _getmatches(self, files, j):
|
||||||
if self.size_threshold or self.scan_type in {
|
if (
|
||||||
ScanType.CONTENTS,
|
self.size_threshold
|
||||||
ScanType.FOLDERS,
|
or self.large_size_threshold
|
||||||
}:
|
or self.scan_type
|
||||||
|
in {
|
||||||
|
ScanType.CONTENTS,
|
||||||
|
ScanType.FOLDERS,
|
||||||
|
}
|
||||||
|
):
|
||||||
j = j.start_subjob([2, 8])
|
j = j.start_subjob([2, 8])
|
||||||
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
||||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||||
if self.size_threshold:
|
if self.size_threshold:
|
||||||
files = [f for f in files if f.size >= self.size_threshold]
|
files = [f for f in files if f.size >= self.size_threshold]
|
||||||
|
if self.large_size_threshold:
|
||||||
|
files = [f for f in files if f.size <= self.large_size_threshold]
|
||||||
if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}:
|
if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}:
|
||||||
return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j)
|
return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j)
|
||||||
else:
|
else:
|
||||||
@ -202,5 +209,6 @@ class Scanner:
|
|||||||
scan_type = ScanType.FILENAME
|
scan_type = ScanType.FILENAME
|
||||||
scanned_tags = {"artist", "title"}
|
scanned_tags = {"artist", "title"}
|
||||||
size_threshold = 0
|
size_threshold = 0
|
||||||
|
large_size_threshold = 0
|
||||||
big_file_size_threshold = 0
|
big_file_size_threshold = 0
|
||||||
word_weighting = False
|
word_weighting = False
|
||||||
|
@ -56,6 +56,8 @@ def test_default_settings(fake_fileexists):
|
|||||||
eq_(s.mix_file_kind, True)
|
eq_(s.mix_file_kind, True)
|
||||||
eq_(s.word_weighting, False)
|
eq_(s.word_weighting, False)
|
||||||
eq_(s.match_similar_words, False)
|
eq_(s.match_similar_words, False)
|
||||||
|
eq_(s.size_threshold, 0)
|
||||||
|
eq_(s.large_size_threshold, 0)
|
||||||
eq_(s.big_file_size_threshold, 0)
|
eq_(s.big_file_size_threshold, 0)
|
||||||
|
|
||||||
|
|
||||||
@ -142,6 +144,50 @@ def test_content_scan_compare_sizes_first(fake_fileexists):
|
|||||||
eq_(len(s.get_dupe_groups(f)), 0)
|
eq_(len(s.get_dupe_groups(f)), 0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_ignore_file_size(fake_fileexists):
|
||||||
|
s = Scanner()
|
||||||
|
s.scan_type = ScanType.CONTENTS
|
||||||
|
small_size = 10 # 10KB
|
||||||
|
s.size_threshold = 0
|
||||||
|
large_size = 100 * 1024 * 1024 # 100MB
|
||||||
|
s.large_size_threshold = 0
|
||||||
|
f = [
|
||||||
|
no("smallignore1", small_size - 1),
|
||||||
|
no("smallignore2", small_size - 1),
|
||||||
|
no("small1", small_size),
|
||||||
|
no("small2", small_size),
|
||||||
|
no("large1", large_size),
|
||||||
|
no("large2", large_size),
|
||||||
|
no("largeignore1", large_size + 1),
|
||||||
|
no("largeignore2", large_size + 1),
|
||||||
|
]
|
||||||
|
f[0].md5 = f[0].md5partial = f[0].md5samples = "smallignore"
|
||||||
|
f[1].md5 = f[1].md5partial = f[1].md5samples = "smallignore"
|
||||||
|
f[2].md5 = f[2].md5partial = f[2].md5samples = "small"
|
||||||
|
f[3].md5 = f[3].md5partial = f[3].md5samples = "small"
|
||||||
|
f[4].md5 = f[4].md5partial = f[4].md5samples = "large"
|
||||||
|
f[5].md5 = f[5].md5partial = f[5].md5samples = "large"
|
||||||
|
f[6].md5 = f[6].md5partial = f[6].md5samples = "largeignore"
|
||||||
|
f[7].md5 = f[7].md5partial = f[7].md5samples = "largeignore"
|
||||||
|
|
||||||
|
r = s.get_dupe_groups(f)
|
||||||
|
# No ignores
|
||||||
|
eq_(len(r), 4)
|
||||||
|
# Ignore smaller
|
||||||
|
s.size_threshold = small_size
|
||||||
|
r = s.get_dupe_groups(f)
|
||||||
|
eq_(len(r), 3)
|
||||||
|
# Ignore larger
|
||||||
|
s.size_threshold = 0
|
||||||
|
s.large_size_threshold = large_size
|
||||||
|
r = s.get_dupe_groups(f)
|
||||||
|
eq_(len(r), 3)
|
||||||
|
# Ignore both
|
||||||
|
s.size_threshold = small_size
|
||||||
|
r = s.get_dupe_groups(f)
|
||||||
|
eq_(len(r), 2)
|
||||||
|
|
||||||
|
|
||||||
def test_big_file_partial_hashes(fake_fileexists):
|
def test_big_file_partial_hashes(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.CONTENTS
|
s.scan_type = ScanType.CONTENTS
|
||||||
|
@ -166,6 +166,10 @@ class DupeGuru(QObject):
|
|||||||
self.model.options["match_similar_words"] = self.prefs.match_similar
|
self.model.options["match_similar_words"] = self.prefs.match_similar
|
||||||
threshold = self.prefs.small_file_threshold if self.prefs.ignore_small_files else 0
|
threshold = self.prefs.small_file_threshold if self.prefs.ignore_small_files else 0
|
||||||
self.model.options["size_threshold"] = threshold * 1024 # threshold is in KB. The scanner wants bytes
|
self.model.options["size_threshold"] = threshold * 1024 # threshold is in KB. The scanner wants bytes
|
||||||
|
large_threshold = self.prefs.large_file_threshold if self.prefs.ignore_large_files else 0
|
||||||
|
self.model.options["large_size_threshold"] = (
|
||||||
|
large_threshold * 1024 * 1024
|
||||||
|
) # threshold is in MB. The Scanner wants bytes
|
||||||
big_file_size_threshold = self.prefs.big_file_size_threshold if self.prefs.big_file_partial_hashes else 0
|
big_file_size_threshold = self.prefs.big_file_size_threshold if self.prefs.big_file_partial_hashes else 0
|
||||||
self.model.options["big_file_size_threshold"] = (
|
self.model.options["big_file_size_threshold"] = (
|
||||||
big_file_size_threshold
|
big_file_size_threshold
|
||||||
|
@ -72,6 +72,8 @@ class Preferences(PreferencesBase):
|
|||||||
self.match_similar = get("MatchSimilar", self.match_similar)
|
self.match_similar = get("MatchSimilar", self.match_similar)
|
||||||
self.ignore_small_files = get("IgnoreSmallFiles", self.ignore_small_files)
|
self.ignore_small_files = get("IgnoreSmallFiles", self.ignore_small_files)
|
||||||
self.small_file_threshold = get("SmallFileThreshold", self.small_file_threshold)
|
self.small_file_threshold = get("SmallFileThreshold", self.small_file_threshold)
|
||||||
|
self.ignore_large_files = get("IgnoreLargeFiles", self.ignore_large_files)
|
||||||
|
self.large_file_threshold = get("LargeFileThreshold", self.large_file_threshold)
|
||||||
self.big_file_partial_hashes = get("BigFilePartialHashes", self.big_file_partial_hashes)
|
self.big_file_partial_hashes = get("BigFilePartialHashes", self.big_file_partial_hashes)
|
||||||
self.big_file_size_threshold = get("BigFileSizeThreshold", self.big_file_size_threshold)
|
self.big_file_size_threshold = get("BigFileSizeThreshold", self.big_file_size_threshold)
|
||||||
self.scan_tag_track = get("ScanTagTrack", self.scan_tag_track)
|
self.scan_tag_track = get("ScanTagTrack", self.scan_tag_track)
|
||||||
@ -119,6 +121,8 @@ class Preferences(PreferencesBase):
|
|||||||
self.match_similar = False
|
self.match_similar = False
|
||||||
self.ignore_small_files = True
|
self.ignore_small_files = True
|
||||||
self.small_file_threshold = 10 # KB
|
self.small_file_threshold = 10 # KB
|
||||||
|
self.ignore_large_files = False
|
||||||
|
self.large_file_threshold = 1000 # MB
|
||||||
self.big_file_partial_hashes = False
|
self.big_file_partial_hashes = False
|
||||||
self.big_file_size_threshold = 100 # MB
|
self.big_file_size_threshold = 100 # MB
|
||||||
self.scan_tag_track = False
|
self.scan_tag_track = False
|
||||||
@ -167,6 +171,8 @@ class Preferences(PreferencesBase):
|
|||||||
set_("MatchSimilar", self.match_similar)
|
set_("MatchSimilar", self.match_similar)
|
||||||
set_("IgnoreSmallFiles", self.ignore_small_files)
|
set_("IgnoreSmallFiles", self.ignore_small_files)
|
||||||
set_("SmallFileThreshold", self.small_file_threshold)
|
set_("SmallFileThreshold", self.small_file_threshold)
|
||||||
|
set_("IgnoreLargeFiles", self.ignore_large_files)
|
||||||
|
set_("LargeFileThreshold", self.large_file_threshold)
|
||||||
set_("BigFilePartialHashes", self.big_file_partial_hashes)
|
set_("BigFilePartialHashes", self.big_file_partial_hashes)
|
||||||
set_("BigFileSizeThreshold", self.big_file_size_threshold)
|
set_("BigFileSizeThreshold", self.big_file_size_threshold)
|
||||||
set_("ScanTagTrack", self.scan_tag_track)
|
set_("ScanTagTrack", self.scan_tag_track)
|
||||||
|
@ -64,6 +64,21 @@ class PreferencesDialog(PreferencesDialogBase):
|
|||||||
spacer_item1 = QSpacerItem(40, 20, QSizePolicy.Expanding, QSizePolicy.Minimum)
|
spacer_item1 = QSpacerItem(40, 20, QSizePolicy.Expanding, QSizePolicy.Minimum)
|
||||||
self.horizontalLayout_2.addItem(spacer_item1)
|
self.horizontalLayout_2.addItem(spacer_item1)
|
||||||
self.verticalLayout_4.addLayout(self.horizontalLayout_2)
|
self.verticalLayout_4.addLayout(self.horizontalLayout_2)
|
||||||
|
self.horizontalLayout_2a = QHBoxLayout()
|
||||||
|
self._setupAddCheckbox("ignoreLargeFilesBox", tr("Ignore files larger than"), self.widget)
|
||||||
|
self.horizontalLayout_2a.addWidget(self.ignoreLargeFilesBox)
|
||||||
|
self.sizeSaturationSpinBox = QSpinBox(self.widget)
|
||||||
|
size_policy = QSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed)
|
||||||
|
self.sizeSaturationSpinBox.setSizePolicy(size_policy)
|
||||||
|
self.sizeSaturationSpinBox.setMaximumSize(QSize(100, 16777215))
|
||||||
|
self.sizeSaturationSpinBox.setRange(0, 1000000)
|
||||||
|
self.horizontalLayout_2a.addWidget(self.sizeSaturationSpinBox)
|
||||||
|
self.label_6a = QLabel(self.widget)
|
||||||
|
self.label_6a.setText(tr("MB"))
|
||||||
|
self.horizontalLayout_2a.addWidget(self.label_6a)
|
||||||
|
spacer_item3 = QSpacerItem(40, 20, QSizePolicy.Expanding, QSizePolicy.Minimum)
|
||||||
|
self.horizontalLayout_2a.addItem(spacer_item3)
|
||||||
|
self.verticalLayout_4.addLayout(self.horizontalLayout_2a)
|
||||||
self.horizontalLayout_2b = QHBoxLayout()
|
self.horizontalLayout_2b = QHBoxLayout()
|
||||||
self._setupAddCheckbox(
|
self._setupAddCheckbox(
|
||||||
"bigFilePartialHashesBox",
|
"bigFilePartialHashesBox",
|
||||||
@ -98,6 +113,8 @@ class PreferencesDialog(PreferencesDialogBase):
|
|||||||
setchecked(self.wordWeightingBox, prefs.word_weighting)
|
setchecked(self.wordWeightingBox, prefs.word_weighting)
|
||||||
setchecked(self.ignoreSmallFilesBox, prefs.ignore_small_files)
|
setchecked(self.ignoreSmallFilesBox, prefs.ignore_small_files)
|
||||||
self.sizeThresholdSpinBox.setValue(prefs.small_file_threshold)
|
self.sizeThresholdSpinBox.setValue(prefs.small_file_threshold)
|
||||||
|
setchecked(self.ignoreLargeFilesBox, prefs.ignore_large_files)
|
||||||
|
self.sizeSaturationSpinBox.setValue(prefs.large_file_threshold)
|
||||||
setchecked(self.bigFilePartialHashesBox, prefs.big_file_partial_hashes)
|
setchecked(self.bigFilePartialHashesBox, prefs.big_file_partial_hashes)
|
||||||
self.bigSizeThresholdSpinBox.setValue(prefs.big_file_size_threshold)
|
self.bigSizeThresholdSpinBox.setValue(prefs.big_file_size_threshold)
|
||||||
|
|
||||||
@ -113,5 +130,7 @@ class PreferencesDialog(PreferencesDialogBase):
|
|||||||
prefs.word_weighting = ischecked(self.wordWeightingBox)
|
prefs.word_weighting = ischecked(self.wordWeightingBox)
|
||||||
prefs.ignore_small_files = ischecked(self.ignoreSmallFilesBox)
|
prefs.ignore_small_files = ischecked(self.ignoreSmallFilesBox)
|
||||||
prefs.small_file_threshold = self.sizeThresholdSpinBox.value()
|
prefs.small_file_threshold = self.sizeThresholdSpinBox.value()
|
||||||
|
prefs.ignore_large_files = ischecked(self.ignoreLargeFilesBox)
|
||||||
|
prefs.large_file_threshold = self.sizeSaturationSpinBox.value()
|
||||||
prefs.big_file_partial_hashes = ischecked(self.bigFilePartialHashesBox)
|
prefs.big_file_partial_hashes = ischecked(self.bigFilePartialHashesBox)
|
||||||
prefs.big_file_size_threshold = self.bigSizeThresholdSpinBox.value()
|
prefs.big_file_size_threshold = self.bigSizeThresholdSpinBox.value()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user