mirror of
				https://github.com/arsenetar/dupeguru.git
				synced 2025-09-11 17:58:17 +00:00 
			
		
		
		
	Catch MemoryError better in PE's block matching algo
fixes #264 (for good this time, hopefully)
This commit is contained in:
		
							parent
							
								
									5b3d5f5d1c
								
							
						
					
					
						commit
						321f8ab406
					
				| @ -193,13 +193,13 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul | |||||||
|         # some wiggle room, log about the incident, and stop matching right here. We then process |         # some wiggle room, log about the incident, and stop matching right here. We then process | ||||||
|         # the matches we have. The rest of the process doesn't allocate much and we should be |         # the matches we have. The rest of the process doesn't allocate much and we should be | ||||||
|         # alright. |         # alright. | ||||||
|         del matches[-1000:] # some wiggle room to ensure we don't run out of memory again. |         del comparisons_to_do, chunks, pictures # some wiggle room for the next statements | ||||||
|         logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000) |         logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches)) | ||||||
|  |         del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again. | ||||||
|     pool.close() |     pool.close() | ||||||
| 
 |  | ||||||
|     result = [] |     result = [] | ||||||
|     myiter = j.iter_with_progress( |     myiter = j.iter_with_progress( | ||||||
|         iterconsume(matches), |         iterconsume(matches, reverse=False), | ||||||
|         tr("Verified %d/%d matches"), |         tr("Verified %d/%d matches"), | ||||||
|         every=10, |         every=10, | ||||||
|         count=len(matches), |         count=len(matches), | ||||||
|  | |||||||
| @ -65,6 +65,12 @@ def test_trailiter(): | |||||||
|     eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')]) |     eq_(list(trailiter(['foo', 'bar'], skipfirst=True)), [('foo', 'bar')]) | ||||||
|     eq_(list(trailiter([], skipfirst=True)), []) # no crash |     eq_(list(trailiter([], skipfirst=True)), []) # no crash | ||||||
| 
 | 
 | ||||||
|  | def test_iterconsume(): | ||||||
|  |     # We just want to make sure that we return *all* items and that we're not mistakenly skipping | ||||||
|  |     # one. | ||||||
|  |     eq_(list(range(2500)), list(iterconsume(list(range(2500))))) | ||||||
|  |     eq_(list(reversed(range(2500))), list(iterconsume(list(range(2500)), reverse=False))) | ||||||
|  | 
 | ||||||
| #--- String | #--- String | ||||||
| 
 | 
 | ||||||
| def test_escape(): | def test_escape(): | ||||||
|  | |||||||
| @ -117,23 +117,20 @@ def trailiter(iterable, skipfirst=False): | |||||||
|         yield prev, item |         yield prev, item | ||||||
|         prev = item |         prev = item | ||||||
| 
 | 
 | ||||||
| def iterconsume(seq): | def iterconsume(seq, reverse=True): | ||||||
|     """Iterate over ``seq`` and discard yielded objects. |     """Iterate over ``seq`` and pops yielded objects. | ||||||
| 
 | 
 | ||||||
|     Right after the ``yield``, we replace the element we've just yielded by ``None`` in the |     Because we use the ``pop()`` method, we reverse ``seq`` before proceeding. If you don't need | ||||||
|     sequence. |     to do that, set ``reverse`` to ``False``. | ||||||
| 
 | 
 | ||||||
|     This is useful in tight memory situation where you are looping over a sequence of objects that |     This is useful in tight memory situation where you are looping over a sequence of objects that | ||||||
|     are going to be discarded afterwards. If you're creating other objects during that iteration |     are going to be discarded afterwards. If you're creating other objects during that iteration | ||||||
|     you might want to use this to avoid ``MemoryError``. |     you might want to use this to avoid ``MemoryError``. | ||||||
| 
 |  | ||||||
|     Note that this only works for sequence (index accessible), not all iterables. |  | ||||||
|     """ |     """ | ||||||
|     # We don't use ``del``, because it would be disastrous performance-wise as the array would have |     if reverse: | ||||||
|     # to be constantly re-allocated. |         seq.reverse() | ||||||
|     for index, elem in enumerate(seq): |     while seq: | ||||||
|         seq[index] = None |         yield seq.pop() | ||||||
|         yield elem |  | ||||||
| 
 | 
 | ||||||
| #--- String related | #--- String related | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user