mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
Simplify getmatches_by_contents() signature
partial and sizeattr attributes are not needed anymore.
This commit is contained in:
parent
64e86c9ff9
commit
fbdd1d866e
@ -246,21 +246,15 @@ def getmatches(
|
|||||||
return result
|
return result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
|
def getmatches_by_contents(files, j=job.nulljob):
|
||||||
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
|
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
|
||||||
|
|
||||||
:param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
|
|
||||||
file to use for comparison.
|
|
||||||
:param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
|
|
||||||
contents hash.
|
|
||||||
:param j: A :ref:`job progress instance <jobs>`.
|
:param j: A :ref:`job progress instance <jobs>`.
|
||||||
"""
|
"""
|
||||||
j = j.start_subjob([2, 8])
|
|
||||||
size2files = defaultdict(set)
|
size2files = defaultdict(set)
|
||||||
for file in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
for f in files:
|
||||||
filesize = getattr(file, sizeattr)
|
if f.size:
|
||||||
if filesize:
|
size2files[f.size].add(f)
|
||||||
size2files[filesize].add(file)
|
|
||||||
del files
|
del files
|
||||||
possible_matches = [files for files in size2files.values() if len(files) > 1]
|
possible_matches = [files for files in size2files.values() if len(files) > 1]
|
||||||
del size2files
|
del size2files
|
||||||
@ -271,7 +265,7 @@ def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob)
|
|||||||
if first.is_ref and second.is_ref:
|
if first.is_ref and second.is_ref:
|
||||||
continue # Don't spend time comparing two ref pics together.
|
continue # Don't spend time comparing two ref pics together.
|
||||||
if first.md5partial == second.md5partial:
|
if first.md5partial == second.md5partial:
|
||||||
if partial or first.md5 == second.md5:
|
if first.md5 == second.md5:
|
||||||
result.append(Match(first, second, 100))
|
result.append(Match(first, second, 100))
|
||||||
j.add_progress(desc=tr("%d matches found") % len(result))
|
j.add_progress(desc=tr("%d matches found") % len(result))
|
||||||
return result
|
return result
|
||||||
|
@ -72,13 +72,14 @@ class Scanner:
|
|||||||
self.discarded_file_count = 0
|
self.discarded_file_count = 0
|
||||||
|
|
||||||
def _getmatches(self, files, j):
|
def _getmatches(self, files, j):
|
||||||
if self.size_threshold:
|
if self.size_threshold or self.scan_type in {ScanType.Contents, ScanType.Folders}:
|
||||||
j = j.start_subjob([2, 8])
|
j = j.start_subjob([2, 8])
|
||||||
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
||||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
||||||
|
if self.size_threshold:
|
||||||
files = [f for f in files if f.size >= self.size_threshold]
|
files = [f for f in files if f.size >= self.size_threshold]
|
||||||
if self.scan_type in {ScanType.Contents, ScanType.Folders}:
|
if self.scan_type in {ScanType.Contents, ScanType.Folders}:
|
||||||
return engine.getmatches_by_contents(files, sizeattr='size', partial=False, j=j)
|
return engine.getmatches_by_contents(files, j=j)
|
||||||
else:
|
else:
|
||||||
j = j.start_subjob([2, 8])
|
j = j.start_subjob([2, 8])
|
||||||
kw = {}
|
kw = {}
|
||||||
@ -98,7 +99,7 @@ class Scanner:
|
|||||||
],
|
],
|
||||||
}[self.scan_type]
|
}[self.scan_type]
|
||||||
for f in j.iter_with_progress(files, tr("Read metadata of %d/%d files")):
|
for f in j.iter_with_progress(files, tr("Read metadata of %d/%d files")):
|
||||||
logging.debug("Reading metadata of {}".format(str(f.path)))
|
logging.debug("Reading metadata of %s", f.path)
|
||||||
f.words = func(f)
|
f.words = func(f)
|
||||||
return engine.getmatches(files, j=j, **kw)
|
return engine.getmatches(files, j=j, **kw)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user