1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2025-09-11 17:58:17 +00:00

Remove filtering of 0 size files in engine

Files size is already able to be filtered at a higher level, some users
may decide to see zero length files. Fix #321.
This commit is contained in:
Andrew Senetar 2021-08-28 18:09:10 -05:00
parent 0a0694e095
commit e22d7d2fc9
Signed by: arsenetar
GPG Key ID: C63300DCE48AB2F1
2 changed files with 5 additions and 6 deletions

View File

@ -288,7 +288,6 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob):
"""
size2files = defaultdict(set)
for f in files:
if f.size:
size2files[f.size].add(f)
del files
possible_matches = [files for files in size2files.values() if len(files) > 1]
@ -300,6 +299,10 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob):
for first, second in itertools.combinations(group, 2):
if first.is_ref and second.is_ref:
continue # Don't spend time comparing two ref pics together.
if first.size == 0 and second.size == 0:
# skip md5 for zero length files
result.append(Match(first, second, 100))
continue
if first.md5partial == second.md5partial:
if bigsize > 0 and first.size > bigsize:
if first.md5samples == second.md5samples:

View File

@ -530,10 +530,6 @@ class TestCaseGetMatches:
class TestCaseGetMatchesByContents:
def test_dont_compare_empty_files(self):
o1, o2 = no(size=0), no(size=0)
assert not getmatches_by_contents([o1, o2])
def test_big_file_partial_hashes(self):
smallsize = 1
bigsize = 100 * 1024 * 1024 # 100MB