1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2025-03-10 05:34:36 +00:00

Catch MemoryError better in PE's block matching algo

fixes #264 (for good this time, hopefully)
This commit is contained in:
Virgil Dupras 2014-10-05 22:22:59 -04:00
parent 5b3d5f5d1c
commit 321f8ab406
3 changed files with 43 additions and 40 deletions

View File

@ -193,13 +193,13 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
# some wiggle room, log about the incident, and stop matching right here. We then process # some wiggle room, log about the incident, and stop matching right here. We then process
# the matches we have. The rest of the process doesn't allocate much and we should be # the matches we have. The rest of the process doesn't allocate much and we should be
# alright. # alright.
del matches[-1000:] # some wiggle room to ensure we don't run out of memory again. del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000) logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
pool.close() pool.close()
result = [] result = []
myiter = j.iter_with_progress( myiter = j.iter_with_progress(
iterconsume(matches), iterconsume(matches, reverse=False),
tr("Verified %d/%d matches"), tr("Verified %d/%d matches"),
every=10, every=10,
count=len(matches), count=len(matches),

View File

@ -65,6 +65,12 @@ def test_trailiter():
eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')]) eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')])
eq_(list(trailiter([], skipfirst=True)), []) # no crash eq_(list(trailiter([], skipfirst=True)), []) # no crash
def test_iterconsume():
# We just want to make sure that we return *all* items and that we're not mistakenly skipping
# one.
eq_(list(range(2500)), list(iterconsume(list(range(2500)))))
eq_(list(reversed(range(2500))), list(iterconsume(list(range(2500)), reverse=False)))
#--- String #--- String
def test_escape(): def test_escape():

View File

@ -117,23 +117,20 @@ def trailiter(iterable, skipfirst=False):
yield prev, item yield prev, item
prev = item prev = item
def iterconsume(seq): def iterconsume(seq, reverse=True):
"""Iterate over ``seq`` and discard yielded objects. """Iterate over ``seq`` and pops yielded objects.
Right after the ``yield``, we replace the element we've just yielded by ``None`` in the Because we use the ``pop()`` method, we reverse ``seq`` before proceeding. If you don't need
sequence. to do that, set ``reverse`` to ``False``.
This is useful in tight memory situation where you are looping over a sequence of objects that This is useful in tight memory situation where you are looping over a sequence of objects that
are going to be discarded afterwards. If you're creating other objects during that iteration are going to be discarded afterwards. If you're creating other objects during that iteration
you might want to use this to avoid ``MemoryError``. you might want to use this to avoid ``MemoryError``.
Note that this only works for sequence (index accessible), not all iterables.
""" """
# We don't use ``del``, because it would be disastrous performance-wise as the array would have if reverse:
# to be constantly re-allocated. seq.reverse()
for index, elem in enumerate(seq): while seq:
seq[index] = None yield seq.pop()
yield elem
#--- String related #--- String related