mirror of
				https://github.com/arsenetar/dupeguru.git
				synced 2025-09-11 17:58:17 +00:00 
			
		
		
		
	Catch MemoryError better in PE's block matching algo
fixes #264 (for good this time, hopefully)
This commit is contained in:
		
							parent
							
								
									5b3d5f5d1c
								
							
						
					
					
						commit
						321f8ab406
					
				| @ -193,13 +193,13 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul | ||||
|         # some wiggle room, log about the incident, and stop matching right here. We then process | ||||
|         # the matches we have. The rest of the process doesn't allocate much and we should be | ||||
|         # alright. | ||||
|         del matches[-1000:] # some wiggle room to ensure we don't run out of memory again. | ||||
|         logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000) | ||||
|         del comparisons_to_do, chunks, pictures # some wiggle room for the next statements | ||||
|         logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches)) | ||||
|         del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again. | ||||
|     pool.close() | ||||
| 
 | ||||
|     result = [] | ||||
|     myiter = j.iter_with_progress( | ||||
|         iterconsume(matches), | ||||
|         iterconsume(matches, reverse=False), | ||||
|         tr("Verified %d/%d matches"), | ||||
|         every=10, | ||||
|         count=len(matches), | ||||
|  | ||||
| @ -65,6 +65,12 @@ def test_trailiter(): | ||||
|     eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')]) | ||||
|     eq_(list(trailiter([], skipfirst=True)), []) # no crash | ||||
| 
 | ||||
| def test_iterconsume(): | ||||
|     # We just want to make sure that we return *all* items and that we're not mistakenly skipping | ||||
|     # one. | ||||
|     eq_(list(range(2500)), list(iterconsume(list(range(2500))))) | ||||
|     eq_(list(reversed(range(2500))), list(iterconsume(list(range(2500)), reverse=False))) | ||||
| 
 | ||||
| #--- String | ||||
| 
 | ||||
| def test_escape(): | ||||
|  | ||||
| @ -117,23 +117,20 @@ def trailiter(iterable, skipfirst=False): | ||||
|         yield prev, item | ||||
|         prev = item | ||||
| 
 | ||||
| def iterconsume(seq): | ||||
|     """Iterate over ``seq`` and discard yielded objects. | ||||
| def iterconsume(seq, reverse=True): | ||||
|     """Iterate over ``seq`` and pops yielded objects. | ||||
| 
 | ||||
|     Right after the ``yield``, we replace the element we've just yielded by ``None`` in the | ||||
|     sequence. | ||||
|     Because we use the ``pop()`` method, we reverse ``seq`` before proceeding. If you don't need | ||||
|     to do that, set ``reverse`` to ``False``. | ||||
| 
 | ||||
|     This is useful in tight memory situation where you are looping over a sequence of objects that | ||||
|     are going to be discarded afterwards. If you're creating other objects during that iteration | ||||
|     you might want to use this to avoid ``MemoryError``. | ||||
| 
 | ||||
|     Note that this only works for sequence (index accessible), not all iterables. | ||||
|     """ | ||||
|     # We don't use ``del``, because it would be disastrous performance-wise as the array would have | ||||
|     # to be constantly re-allocated. | ||||
|     for index, elem in enumerate(seq): | ||||
|         seq[index] = None | ||||
|         yield elem | ||||
|     if reverse: | ||||
|         seq.reverse() | ||||
|     while seq: | ||||
|         yield seq.pop() | ||||
| 
 | ||||
| #--- String related | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user