mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
Catch MemoryError better in PE's block matching algo
fixes #264 (for good this time, hopefully)
This commit is contained in:
parent
5b3d5f5d1c
commit
321f8ab406
@ -193,13 +193,13 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
|
||||
# some wiggle room, log about the incident, and stop matching right here. We then process
|
||||
# the matches we have. The rest of the process doesn't allocate much and we should be
|
||||
# alright.
|
||||
del matches[-1000:] # some wiggle room to ensure we don't run out of memory again.
|
||||
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000)
|
||||
del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
|
||||
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
|
||||
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
|
||||
pool.close()
|
||||
|
||||
result = []
|
||||
myiter = j.iter_with_progress(
|
||||
iterconsume(matches),
|
||||
iterconsume(matches, reverse=False),
|
||||
tr("Verified %d/%d matches"),
|
||||
every=10,
|
||||
count=len(matches),
|
||||
|
@ -65,6 +65,12 @@ def test_trailiter():
|
||||
eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')])
|
||||
eq_(list(trailiter([], skipfirst=True)), []) # no crash
|
||||
|
||||
def test_iterconsume():
|
||||
# We just want to make sure that we return *all* items and that we're not mistakenly skipping
|
||||
# one.
|
||||
eq_(list(range(2500)), list(iterconsume(list(range(2500)))))
|
||||
eq_(list(reversed(range(2500))), list(iterconsume(list(range(2500)), reverse=False)))
|
||||
|
||||
#--- String
|
||||
|
||||
def test_escape():
|
||||
|
@ -117,23 +117,20 @@ def trailiter(iterable, skipfirst=False):
|
||||
yield prev, item
|
||||
prev = item
|
||||
|
||||
def iterconsume(seq):
|
||||
"""Iterate over ``seq`` and discard yielded objects.
|
||||
def iterconsume(seq, reverse=True):
|
||||
"""Iterate over ``seq`` and pops yielded objects.
|
||||
|
||||
Right after the ``yield``, we replace the element we've just yielded by ``None`` in the
|
||||
sequence.
|
||||
Because we use the ``pop()`` method, we reverse ``seq`` before proceeding. If you don't need
|
||||
to do that, set ``reverse`` to ``False``.
|
||||
|
||||
This is useful in tight memory situation where you are looping over a sequence of objects that
|
||||
are going to be discarded afterwards. If you're creating other objects during that iteration
|
||||
you might want to use this to avoid ``MemoryError``.
|
||||
|
||||
Note that this only works for sequence (index accessible), not all iterables.
|
||||
"""
|
||||
# We don't use ``del``, because it would be disastrous performance-wise as the array would have
|
||||
# to be constantly re-allocated.
|
||||
for index, elem in enumerate(seq):
|
||||
seq[index] = None
|
||||
yield elem
|
||||
if reverse:
|
||||
seq.reverse()
|
||||
while seq:
|
||||
yield seq.pop()
|
||||
|
||||
#--- String related
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user