Catch MemoryError better in PE's block matching algo

fixes #264 (for good this time, hopefully)
2025-09-11 17:58:17 +00:00 · 2014-10-05 22:22:59 -04:00 · 2014-10-05 22:22:59 -04:00 · 321f8ab406
commit 321f8ab406
parent 5b3d5f5d1c
3 changed files with 43 additions and 40 deletions
--- a/core_pe/matchblock.py
+++ b/core_pe/matchblock.py
@ -193,13 +193,13 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
        # some wiggle room, log about the incident, and stop matching right here. We then process
        # the matches we have. The rest of the process doesn't allocate much and we should be
        # alright.
-        del matches[-1000:] # some wiggle room to ensure we don't run out of memory again.
-        logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000)
+        del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
+        logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
+        del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
    pool.close()
-
    result = []
    myiter = j.iter_with_progress(
-        iterconsume(matches),
+        iterconsume(matches, reverse=False),
        tr("Verified %d/%d matches"),
        every=10,
        count=len(matches),
--- a/hscommon/tests/util_test.py
+++ b/hscommon/tests/util_test.py
@ -65,6 +65,12 @@ def test_trailiter():
    eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')])
    eq_(list(trailiter([], skipfirst=True)), []) # no crash

+def test_iterconsume():
+    # We just want to make sure that we return *all* items and that we're not mistakenly skipping
+    # one.
+    eq_(list(range(2500)), list(iterconsume(list(range(2500)))))
+    eq_(list(reversed(range(2500))), list(iterconsume(list(range(2500)), reverse=False)))
+
 #--- String

 def test_escape():
--- a/hscommon/util.py
+++ b/hscommon/util.py
@ -117,23 +117,20 @@ def trailiter(iterable, skipfirst=False):
        yield prev, item
        prev = item

-def iterconsume(seq):
-    """Iterate over ``seq`` and discard yielded objects.
+def iterconsume(seq, reverse=True):
+    """Iterate over ``seq`` and pops yielded objects.

-    Right after the ``yield``, we replace the element we've just yielded by ``None`` in the
-    sequence.
+    Because we use the ``pop()`` method, we reverse ``seq`` before proceeding. If you don't need
+    to do that, set ``reverse`` to ``False``.

    This is useful in tight memory situation where you are looping over a sequence of objects that
    are going to be discarded afterwards. If you're creating other objects during that iteration
    you might want to use this to avoid ``MemoryError``.
-
-    Note that this only works for sequence (index accessible), not all iterables.
    """
-    # We don't use ``del``, because it would be disastrous performance-wise as the array would have
-    # to be constantly re-allocated.
-    for index, elem in enumerate(seq):
-        seq[index] = None
-        yield elem
+    if reverse:
+        seq.reverse()
+    while seq:
+        yield seq.pop()

 #--- String related