mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-05-08 09:49:51 +00:00
Catch MemoryError better in PE's block matching algo
fixes #264 (for good this time, hopefully)
This commit is contained in:
parent
5b3d5f5d1c
commit
321f8ab406
@ -193,13 +193,13 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul
|
|||||||
# some wiggle room, log about the incident, and stop matching right here. We then process
|
# some wiggle room, log about the incident, and stop matching right here. We then process
|
||||||
# the matches we have. The rest of the process doesn't allocate much and we should be
|
# the matches we have. The rest of the process doesn't allocate much and we should be
|
||||||
# alright.
|
# alright.
|
||||||
del matches[-1000:] # some wiggle room to ensure we don't run out of memory again.
|
del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
|
||||||
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000)
|
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
|
||||||
|
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
|
||||||
pool.close()
|
pool.close()
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
myiter = j.iter_with_progress(
|
myiter = j.iter_with_progress(
|
||||||
iterconsume(matches),
|
iterconsume(matches, reverse=False),
|
||||||
tr("Verified %d/%d matches"),
|
tr("Verified %d/%d matches"),
|
||||||
every=10,
|
every=10,
|
||||||
count=len(matches),
|
count=len(matches),
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
# Created By: Virgil Dupras
|
# Created By: Virgil Dupras
|
||||||
# Created On: 2011-01-11
|
# Created On: 2011-01-11
|
||||||
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||||
#
|
#
|
||||||
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.hardcoded.net/licenses/bsd_license
|
# http://www.hardcoded.net/licenses/bsd_license
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
@ -65,6 +65,12 @@ def test_trailiter():
|
|||||||
eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')])
|
eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')])
|
||||||
eq_(list(trailiter([], skipfirst=True)), []) # no crash
|
eq_(list(trailiter([], skipfirst=True)), []) # no crash
|
||||||
|
|
||||||
|
def test_iterconsume():
|
||||||
|
# We just want to make sure that we return *all* items and that we're not mistakenly skipping
|
||||||
|
# one.
|
||||||
|
eq_(list(range(2500)), list(iterconsume(list(range(2500)))))
|
||||||
|
eq_(list(reversed(range(2500))), list(iterconsume(list(range(2500)), reverse=False)))
|
||||||
|
|
||||||
#--- String
|
#--- String
|
||||||
|
|
||||||
def test_escape():
|
def test_escape():
|
||||||
@ -188,63 +194,63 @@ class TestCase_modified_after:
|
|||||||
monkeyplus.patch_osstat('first', st_mtime=42)
|
monkeyplus.patch_osstat('first', st_mtime=42)
|
||||||
monkeyplus.patch_osstat('second', st_mtime=41)
|
monkeyplus.patch_osstat('second', st_mtime=41)
|
||||||
assert modified_after('first', 'second')
|
assert modified_after('first', 'second')
|
||||||
|
|
||||||
def test_second_is_modified_after(self, monkeyplus):
|
def test_second_is_modified_after(self, monkeyplus):
|
||||||
monkeyplus.patch_osstat('first', st_mtime=42)
|
monkeyplus.patch_osstat('first', st_mtime=42)
|
||||||
monkeyplus.patch_osstat('second', st_mtime=43)
|
monkeyplus.patch_osstat('second', st_mtime=43)
|
||||||
assert not modified_after('first', 'second')
|
assert not modified_after('first', 'second')
|
||||||
|
|
||||||
def test_same_mtime(self, monkeyplus):
|
def test_same_mtime(self, monkeyplus):
|
||||||
monkeyplus.patch_osstat('first', st_mtime=42)
|
monkeyplus.patch_osstat('first', st_mtime=42)
|
||||||
monkeyplus.patch_osstat('second', st_mtime=42)
|
monkeyplus.patch_osstat('second', st_mtime=42)
|
||||||
assert not modified_after('first', 'second')
|
assert not modified_after('first', 'second')
|
||||||
|
|
||||||
def test_first_file_does_not_exist(self, monkeyplus):
|
def test_first_file_does_not_exist(self, monkeyplus):
|
||||||
# when the first file doesn't exist, we return False
|
# when the first file doesn't exist, we return False
|
||||||
monkeyplus.patch_osstat('second', st_mtime=42)
|
monkeyplus.patch_osstat('second', st_mtime=42)
|
||||||
assert not modified_after('does_not_exist', 'second') # no crash
|
assert not modified_after('does_not_exist', 'second') # no crash
|
||||||
|
|
||||||
def test_second_file_does_not_exist(self, monkeyplus):
|
def test_second_file_does_not_exist(self, monkeyplus):
|
||||||
# when the second file doesn't exist, we return True
|
# when the second file doesn't exist, we return True
|
||||||
monkeyplus.patch_osstat('first', st_mtime=42)
|
monkeyplus.patch_osstat('first', st_mtime=42)
|
||||||
assert modified_after('first', 'does_not_exist') # no crash
|
assert modified_after('first', 'does_not_exist') # no crash
|
||||||
|
|
||||||
def test_first_file_is_none(self, monkeyplus):
|
def test_first_file_is_none(self, monkeyplus):
|
||||||
# when the first file is None, we return False
|
# when the first file is None, we return False
|
||||||
monkeyplus.patch_osstat('second', st_mtime=42)
|
monkeyplus.patch_osstat('second', st_mtime=42)
|
||||||
assert not modified_after(None, 'second') # no crash
|
assert not modified_after(None, 'second') # no crash
|
||||||
|
|
||||||
def test_second_file_is_none(self, monkeyplus):
|
def test_second_file_is_none(self, monkeyplus):
|
||||||
# when the second file is None, we return True
|
# when the second file is None, we return True
|
||||||
monkeyplus.patch_osstat('first', st_mtime=42)
|
monkeyplus.patch_osstat('first', st_mtime=42)
|
||||||
assert modified_after('first', None) # no crash
|
assert modified_after('first', None) # no crash
|
||||||
|
|
||||||
|
|
||||||
class TestCase_delete_if_empty:
|
class TestCase_delete_if_empty:
|
||||||
def test_is_empty(self, tmpdir):
|
def test_is_empty(self, tmpdir):
|
||||||
testpath = Path(str(tmpdir))
|
testpath = Path(str(tmpdir))
|
||||||
assert delete_if_empty(testpath)
|
assert delete_if_empty(testpath)
|
||||||
assert not testpath.exists()
|
assert not testpath.exists()
|
||||||
|
|
||||||
def test_not_empty(self, tmpdir):
|
def test_not_empty(self, tmpdir):
|
||||||
testpath = Path(str(tmpdir))
|
testpath = Path(str(tmpdir))
|
||||||
testpath['foo'].mkdir()
|
testpath['foo'].mkdir()
|
||||||
assert not delete_if_empty(testpath)
|
assert not delete_if_empty(testpath)
|
||||||
assert testpath.exists()
|
assert testpath.exists()
|
||||||
|
|
||||||
def test_with_files_to_delete(self, tmpdir):
|
def test_with_files_to_delete(self, tmpdir):
|
||||||
testpath = Path(str(tmpdir))
|
testpath = Path(str(tmpdir))
|
||||||
testpath['foo'].open('w')
|
testpath['foo'].open('w')
|
||||||
testpath['bar'].open('w')
|
testpath['bar'].open('w')
|
||||||
assert delete_if_empty(testpath, ['foo', 'bar'])
|
assert delete_if_empty(testpath, ['foo', 'bar'])
|
||||||
assert not testpath.exists()
|
assert not testpath.exists()
|
||||||
|
|
||||||
def test_directory_in_files_to_delete(self, tmpdir):
|
def test_directory_in_files_to_delete(self, tmpdir):
|
||||||
testpath = Path(str(tmpdir))
|
testpath = Path(str(tmpdir))
|
||||||
testpath['foo'].mkdir()
|
testpath['foo'].mkdir()
|
||||||
assert not delete_if_empty(testpath, ['foo'])
|
assert not delete_if_empty(testpath, ['foo'])
|
||||||
assert testpath.exists()
|
assert testpath.exists()
|
||||||
|
|
||||||
def test_delete_files_to_delete_only_if_dir_is_empty(self, tmpdir):
|
def test_delete_files_to_delete_only_if_dir_is_empty(self, tmpdir):
|
||||||
testpath = Path(str(tmpdir))
|
testpath = Path(str(tmpdir))
|
||||||
testpath['foo'].open('w')
|
testpath['foo'].open('w')
|
||||||
@ -252,25 +258,25 @@ class TestCase_delete_if_empty:
|
|||||||
assert not delete_if_empty(testpath, ['foo'])
|
assert not delete_if_empty(testpath, ['foo'])
|
||||||
assert testpath.exists()
|
assert testpath.exists()
|
||||||
assert testpath['foo'].exists()
|
assert testpath['foo'].exists()
|
||||||
|
|
||||||
def test_doesnt_exist(self):
|
def test_doesnt_exist(self):
|
||||||
# When the 'path' doesn't exist, just do nothing.
|
# When the 'path' doesn't exist, just do nothing.
|
||||||
delete_if_empty(Path('does_not_exist')) # no crash
|
delete_if_empty(Path('does_not_exist')) # no crash
|
||||||
|
|
||||||
def test_is_file(self, tmpdir):
|
def test_is_file(self, tmpdir):
|
||||||
# When 'path' is a file, do nothing.
|
# When 'path' is a file, do nothing.
|
||||||
p = Path(str(tmpdir)) + 'filename'
|
p = Path(str(tmpdir)) + 'filename'
|
||||||
p.open('w').close()
|
p.open('w').close()
|
||||||
delete_if_empty(p) # no crash
|
delete_if_empty(p) # no crash
|
||||||
|
|
||||||
def test_ioerror(self, tmpdir, monkeypatch):
|
def test_ioerror(self, tmpdir, monkeypatch):
|
||||||
# if an IO error happens during the operation, ignore it.
|
# if an IO error happens during the operation, ignore it.
|
||||||
def do_raise(*args, **kw):
|
def do_raise(*args, **kw):
|
||||||
raise OSError()
|
raise OSError()
|
||||||
|
|
||||||
monkeypatch.setattr(Path, 'rmdir', do_raise)
|
monkeypatch.setattr(Path, 'rmdir', do_raise)
|
||||||
delete_if_empty(Path(str(tmpdir))) # no crash
|
delete_if_empty(Path(str(tmpdir))) # no crash
|
||||||
|
|
||||||
|
|
||||||
class TestCase_open_if_filename:
|
class TestCase_open_if_filename:
|
||||||
def test_file_name(self, tmpdir):
|
def test_file_name(self, tmpdir):
|
||||||
@ -280,7 +286,7 @@ class TestCase_open_if_filename:
|
|||||||
assert close
|
assert close
|
||||||
eq_(b'test_data', file.read())
|
eq_(b'test_data', file.read())
|
||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
def test_opened_file(self):
|
def test_opened_file(self):
|
||||||
sio = StringIO()
|
sio = StringIO()
|
||||||
sio.write('test_data')
|
sio.write('test_data')
|
||||||
@ -288,14 +294,14 @@ class TestCase_open_if_filename:
|
|||||||
file, close = open_if_filename(sio)
|
file, close = open_if_filename(sio)
|
||||||
assert not close
|
assert not close
|
||||||
eq_('test_data', file.read())
|
eq_('test_data', file.read())
|
||||||
|
|
||||||
def test_mode_is_passed_to_open(self, tmpdir):
|
def test_mode_is_passed_to_open(self, tmpdir):
|
||||||
filepath = str(tmpdir.join('test.txt'))
|
filepath = str(tmpdir.join('test.txt'))
|
||||||
open(filepath, 'w').close()
|
open(filepath, 'w').close()
|
||||||
file, close = open_if_filename(filepath, 'a')
|
file, close = open_if_filename(filepath, 'a')
|
||||||
eq_('a', file.mode)
|
eq_('a', file.mode)
|
||||||
file.close()
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
class TestCase_FileOrPath:
|
class TestCase_FileOrPath:
|
||||||
def test_path(self, tmpdir):
|
def test_path(self, tmpdir):
|
||||||
@ -303,17 +309,17 @@ class TestCase_FileOrPath:
|
|||||||
open(filepath, 'wb').write(b'test_data')
|
open(filepath, 'wb').write(b'test_data')
|
||||||
with FileOrPath(filepath) as fp:
|
with FileOrPath(filepath) as fp:
|
||||||
eq_(b'test_data', fp.read())
|
eq_(b'test_data', fp.read())
|
||||||
|
|
||||||
def test_opened_file(self):
|
def test_opened_file(self):
|
||||||
sio = StringIO()
|
sio = StringIO()
|
||||||
sio.write('test_data')
|
sio.write('test_data')
|
||||||
sio.seek(0)
|
sio.seek(0)
|
||||||
with FileOrPath(sio) as fp:
|
with FileOrPath(sio) as fp:
|
||||||
eq_('test_data', fp.read())
|
eq_('test_data', fp.read())
|
||||||
|
|
||||||
def test_mode_is_passed_to_open(self, tmpdir):
|
def test_mode_is_passed_to_open(self, tmpdir):
|
||||||
filepath = str(tmpdir.join('test.txt'))
|
filepath = str(tmpdir.join('test.txt'))
|
||||||
open(filepath, 'w').close()
|
open(filepath, 'w').close()
|
||||||
with FileOrPath(filepath, 'a') as fp:
|
with FileOrPath(filepath, 'a') as fp:
|
||||||
eq_('a', fp.mode)
|
eq_('a', fp.mode)
|
||||||
|
|
||||||
|
@ -117,23 +117,20 @@ def trailiter(iterable, skipfirst=False):
|
|||||||
yield prev, item
|
yield prev, item
|
||||||
prev = item
|
prev = item
|
||||||
|
|
||||||
def iterconsume(seq):
|
def iterconsume(seq, reverse=True):
|
||||||
"""Iterate over ``seq`` and discard yielded objects.
|
"""Iterate over ``seq`` and pops yielded objects.
|
||||||
|
|
||||||
Right after the ``yield``, we replace the element we've just yielded by ``None`` in the
|
Because we use the ``pop()`` method, we reverse ``seq`` before proceeding. If you don't need
|
||||||
sequence.
|
to do that, set ``reverse`` to ``False``.
|
||||||
|
|
||||||
This is useful in tight memory situation where you are looping over a sequence of objects that
|
This is useful in tight memory situation where you are looping over a sequence of objects that
|
||||||
are going to be discarded afterwards. If you're creating other objects during that iteration
|
are going to be discarded afterwards. If you're creating other objects during that iteration
|
||||||
you might want to use this to avoid ``MemoryError``.
|
you might want to use this to avoid ``MemoryError``.
|
||||||
|
|
||||||
Note that this only works for sequence (index accessible), not all iterables.
|
|
||||||
"""
|
"""
|
||||||
# We don't use ``del``, because it would be disastrous performance-wise as the array would have
|
if reverse:
|
||||||
# to be constantly re-allocated.
|
seq.reverse()
|
||||||
for index, elem in enumerate(seq):
|
while seq:
|
||||||
seq[index] = None
|
yield seq.pop()
|
||||||
yield elem
|
|
||||||
|
|
||||||
#--- String related
|
#--- String related
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user