mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
parent
44266273bf
commit
372a682610
@ -10,7 +10,7 @@ import logging
|
|||||||
import multiprocessing
|
import multiprocessing
|
||||||
from itertools import combinations
|
from itertools import combinations
|
||||||
|
|
||||||
from hscommon.util import extract
|
from hscommon.util import extract, iterconsume
|
||||||
from hscommon.trans import tr
|
from hscommon.trans import tr
|
||||||
from hscommon.jobprogress import job
|
from hscommon.jobprogress import job
|
||||||
|
|
||||||
@ -175,25 +175,34 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
|
|||||||
comparisons_to_do = list(combinations(chunks + [None], 2))
|
comparisons_to_do = list(combinations(chunks + [None], 2))
|
||||||
comparison_count = 0
|
comparison_count = 0
|
||||||
j.start_job(len(comparisons_to_do))
|
j.start_job(len(comparisons_to_do))
|
||||||
for ref_chunk, other_chunk in comparisons_to_do:
|
try:
|
||||||
picinfo = {p.cache_id: get_picinfo(p) for p in ref_chunk}
|
for ref_chunk, other_chunk in comparisons_to_do:
|
||||||
ref_ids = [p.cache_id for p in ref_chunk]
|
picinfo = {p.cache_id: get_picinfo(p) for p in ref_chunk}
|
||||||
if other_chunk is not None:
|
ref_ids = [p.cache_id for p in ref_chunk]
|
||||||
other_ids = [p.cache_id for p in other_chunk]
|
if other_chunk is not None:
|
||||||
picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk})
|
other_ids = [p.cache_id for p in other_chunk]
|
||||||
else:
|
picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk})
|
||||||
other_ids = None
|
else:
|
||||||
args = (ref_ids, other_ids, cache_path, threshold, picinfo)
|
other_ids = None
|
||||||
async_results.append(pool.apply_async(async_compare, args))
|
args = (ref_ids, other_ids, cache_path, threshold, picinfo)
|
||||||
collect_results()
|
async_results.append(pool.apply_async(async_compare, args))
|
||||||
collect_results(collect_all=True)
|
collect_results()
|
||||||
|
collect_results(collect_all=True)
|
||||||
|
except MemoryError:
|
||||||
|
# Rare, but possible, even in 64bit situations (ref #264). What do we do now? We free us
|
||||||
|
# some wiggle room, log about the incident, and stop matching right here. We then process
|
||||||
|
# the matches we have. The rest of the process doesn't allocate much and we should be
|
||||||
|
# alright.
|
||||||
|
del matches[-1000:] # some wiggle room to ensure we don't run out of memory again.
|
||||||
|
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000)
|
||||||
pool.close()
|
pool.close()
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
myiter = j.iter_with_progress(
|
myiter = j.iter_with_progress(
|
||||||
matches,
|
iterconsume(matches),
|
||||||
tr("Verified %d/%d matches"),
|
tr("Verified %d/%d matches"),
|
||||||
every=10
|
every=10,
|
||||||
|
count=len(matches),
|
||||||
)
|
)
|
||||||
for ref_id, other_id, percentage in myiter:
|
for ref_id, other_id, percentage in myiter:
|
||||||
ref = id2picture[ref_id]
|
ref = id2picture[ref_id]
|
||||||
|
@ -80,21 +80,27 @@ class Job:
|
|||||||
def check_if_cancelled(self):
|
def check_if_cancelled(self):
|
||||||
self._do_update('')
|
self._do_update('')
|
||||||
|
|
||||||
def iter_with_progress(self, sequence, desc_format=None, every=1):
|
def iter_with_progress(self, iterable, desc_format=None, every=1, count=None):
|
||||||
''' Iterate through sequence while automatically adding progress.
|
"""Iterate through ``iterable`` while automatically adding progress.
|
||||||
'''
|
|
||||||
|
WARNING: We need our iterable's length. If ``iterable`` is not a sequence (that is,
|
||||||
|
something we can call ``len()`` on), you *have* to specify a count through the ``count``
|
||||||
|
argument. If ``count`` is ``None``, ``len(iterable)`` is used.
|
||||||
|
"""
|
||||||
|
if count is None:
|
||||||
|
count = len(iterable)
|
||||||
desc = ''
|
desc = ''
|
||||||
if desc_format:
|
if desc_format:
|
||||||
desc = desc_format % (0, len(sequence))
|
desc = desc_format % (0, count)
|
||||||
self.start_job(len(sequence), desc)
|
self.start_job(count, desc)
|
||||||
for i, element in enumerate(sequence, start=1):
|
for i, element in enumerate(iterable, start=1):
|
||||||
yield element
|
yield element
|
||||||
if i % every == 0:
|
if i % every == 0:
|
||||||
if desc_format:
|
if desc_format:
|
||||||
desc = desc_format % (i, len(sequence))
|
desc = desc_format % (i, count)
|
||||||
self.add_progress(progress=every, desc=desc)
|
self.add_progress(progress=every, desc=desc)
|
||||||
if desc_format:
|
if desc_format:
|
||||||
desc = desc_format % (len(sequence), len(sequence))
|
desc = desc_format % (count, count)
|
||||||
self.set_progress(100, desc)
|
self.set_progress(100, desc)
|
||||||
|
|
||||||
def start_job(self, max_progress=100, desc=''):
|
def start_job(self, max_progress=100, desc=''):
|
||||||
|
@ -117,6 +117,24 @@ def trailiter(iterable, skipfirst=False):
|
|||||||
yield prev, item
|
yield prev, item
|
||||||
prev = item
|
prev = item
|
||||||
|
|
||||||
|
def iterconsume(seq):
|
||||||
|
"""Iterate over ``seq`` and discard yielded objects.
|
||||||
|
|
||||||
|
Right after the ``yield``, we replace the element we've just yielded by ``None`` in the
|
||||||
|
sequence.
|
||||||
|
|
||||||
|
This is useful in tight memory situation where you are looping over a sequence of objects that
|
||||||
|
are going to be discarded afterwards. If you're creating other objects during that iteration
|
||||||
|
you might want to use this to avoid ``MemoryError``.
|
||||||
|
|
||||||
|
Note that this only works for sequence (index accessible), not all iterables.
|
||||||
|
"""
|
||||||
|
# We don't use ``del``, because it would be disastrous performance-wise as the array would have
|
||||||
|
# to be constantly re-allocated.
|
||||||
|
for index, elem in enumerate(seq):
|
||||||
|
seq[index] = None
|
||||||
|
yield elem
|
||||||
|
|
||||||
#--- String related
|
#--- String related
|
||||||
|
|
||||||
def escape(s, to_escape, escape_with='\\'):
|
def escape(s, to_escape, escape_with='\\'):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user