mirror of
https://github.com/arsenetar/dupeguru.git
synced 2026-01-22 06:37:17 +00:00
Add preference to ignore large files, close #430
This commit is contained in:
@@ -77,15 +77,22 @@ class Scanner:
|
||||
self.discarded_file_count = 0
|
||||
|
||||
def _getmatches(self, files, j):
|
||||
if self.size_threshold or self.scan_type in {
|
||||
ScanType.CONTENTS,
|
||||
ScanType.FOLDERS,
|
||||
}:
|
||||
if (
|
||||
self.size_threshold
|
||||
or self.large_size_threshold
|
||||
or self.scan_type
|
||||
in {
|
||||
ScanType.CONTENTS,
|
||||
ScanType.FOLDERS,
|
||||
}
|
||||
):
|
||||
j = j.start_subjob([2, 8])
|
||||
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||
if self.size_threshold:
|
||||
files = [f for f in files if f.size >= self.size_threshold]
|
||||
if self.large_size_threshold:
|
||||
files = [f for f in files if f.size <= self.large_size_threshold]
|
||||
if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}:
|
||||
return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j)
|
||||
else:
|
||||
@@ -202,5 +209,6 @@ class Scanner:
|
||||
scan_type = ScanType.FILENAME
|
||||
scanned_tags = {"artist", "title"}
|
||||
size_threshold = 0
|
||||
large_size_threshold = 0
|
||||
big_file_size_threshold = 0
|
||||
word_weighting = False
|
||||
|
||||
@@ -56,6 +56,8 @@ def test_default_settings(fake_fileexists):
|
||||
eq_(s.mix_file_kind, True)
|
||||
eq_(s.word_weighting, False)
|
||||
eq_(s.match_similar_words, False)
|
||||
eq_(s.size_threshold, 0)
|
||||
eq_(s.large_size_threshold, 0)
|
||||
eq_(s.big_file_size_threshold, 0)
|
||||
|
||||
|
||||
@@ -142,6 +144,50 @@ def test_content_scan_compare_sizes_first(fake_fileexists):
|
||||
eq_(len(s.get_dupe_groups(f)), 0)
|
||||
|
||||
|
||||
def test_ignore_file_size(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.CONTENTS
|
||||
small_size = 10 # 10KB
|
||||
s.size_threshold = 0
|
||||
large_size = 100 * 1024 * 1024 # 100MB
|
||||
s.large_size_threshold = 0
|
||||
f = [
|
||||
no("smallignore1", small_size - 1),
|
||||
no("smallignore2", small_size - 1),
|
||||
no("small1", small_size),
|
||||
no("small2", small_size),
|
||||
no("large1", large_size),
|
||||
no("large2", large_size),
|
||||
no("largeignore1", large_size + 1),
|
||||
no("largeignore2", large_size + 1),
|
||||
]
|
||||
f[0].md5 = f[0].md5partial = f[0].md5samples = "smallignore"
|
||||
f[1].md5 = f[1].md5partial = f[1].md5samples = "smallignore"
|
||||
f[2].md5 = f[2].md5partial = f[2].md5samples = "small"
|
||||
f[3].md5 = f[3].md5partial = f[3].md5samples = "small"
|
||||
f[4].md5 = f[4].md5partial = f[4].md5samples = "large"
|
||||
f[5].md5 = f[5].md5partial = f[5].md5samples = "large"
|
||||
f[6].md5 = f[6].md5partial = f[6].md5samples = "largeignore"
|
||||
f[7].md5 = f[7].md5partial = f[7].md5samples = "largeignore"
|
||||
|
||||
r = s.get_dupe_groups(f)
|
||||
# No ignores
|
||||
eq_(len(r), 4)
|
||||
# Ignore smaller
|
||||
s.size_threshold = small_size
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 3)
|
||||
# Ignore larger
|
||||
s.size_threshold = 0
|
||||
s.large_size_threshold = large_size
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 3)
|
||||
# Ignore both
|
||||
s.size_threshold = small_size
|
||||
r = s.get_dupe_groups(f)
|
||||
eq_(len(r), 2)
|
||||
|
||||
|
||||
def test_big_file_partial_hashes(fake_fileexists):
|
||||
s = Scanner()
|
||||
s.scan_type = ScanType.CONTENTS
|
||||
|
||||
Reference in New Issue
Block a user