mirror of
				https://github.com/arsenetar/dupeguru.git
				synced 2025-09-11 17:58:17 +00:00 
			
		
		
		
	
							parent
							
								
									44266273bf
								
							
						
					
					
						commit
						372a682610
					
				| @ -10,7 +10,7 @@ import logging | ||||
| import multiprocessing | ||||
| from itertools import combinations | ||||
| 
 | ||||
| from hscommon.util import extract | ||||
| from hscommon.util import extract, iterconsume | ||||
| from hscommon.trans import tr | ||||
| from hscommon.jobprogress import job | ||||
| 
 | ||||
| @ -175,25 +175,34 @@ def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nul | ||||
|     comparisons_to_do = list(combinations(chunks + [None], 2)) | ||||
|     comparison_count = 0 | ||||
|     j.start_job(len(comparisons_to_do)) | ||||
|     for ref_chunk, other_chunk in comparisons_to_do: | ||||
|         picinfo = {p.cache_id: get_picinfo(p) for p in ref_chunk} | ||||
|         ref_ids = [p.cache_id for p in ref_chunk] | ||||
|         if other_chunk is not None: | ||||
|             other_ids = [p.cache_id for p in other_chunk] | ||||
|             picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk}) | ||||
|         else: | ||||
|             other_ids = None | ||||
|         args = (ref_ids, other_ids, cache_path, threshold, picinfo) | ||||
|         async_results.append(pool.apply_async(async_compare, args)) | ||||
|         collect_results() | ||||
|     collect_results(collect_all=True) | ||||
|     try: | ||||
|         for ref_chunk, other_chunk in comparisons_to_do: | ||||
|             picinfo = {p.cache_id: get_picinfo(p) for p in ref_chunk} | ||||
|             ref_ids = [p.cache_id for p in ref_chunk] | ||||
|             if other_chunk is not None: | ||||
|                 other_ids = [p.cache_id for p in other_chunk] | ||||
|                 picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk}) | ||||
|             else: | ||||
|                 other_ids = None | ||||
|             args = (ref_ids, other_ids, cache_path, threshold, picinfo) | ||||
|             async_results.append(pool.apply_async(async_compare, args)) | ||||
|             collect_results() | ||||
|         collect_results(collect_all=True) | ||||
|     except MemoryError: | ||||
|         # Rare, but possible, even in 64bit situations (ref #264). What do we do now? We free us | ||||
|         # some wiggle room, log about the incident, and stop matching right here. We then process | ||||
|         # the matches we have. The rest of the process doesn't allocate much and we should be | ||||
|         # alright. | ||||
|         del matches[-1000:] # some wiggle room to ensure we don't run out of memory again. | ||||
|         logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches) + 1000) | ||||
|     pool.close() | ||||
| 
 | ||||
|     result = [] | ||||
|     myiter = j.iter_with_progress( | ||||
|         matches, | ||||
|         iterconsume(matches), | ||||
|         tr("Verified %d/%d matches"), | ||||
|         every=10 | ||||
|         every=10, | ||||
|         count=len(matches), | ||||
|     ) | ||||
|     for ref_id, other_id, percentage in myiter: | ||||
|         ref = id2picture[ref_id] | ||||
|  | ||||
| @ -80,21 +80,27 @@ class Job: | ||||
|     def check_if_cancelled(self): | ||||
|         self._do_update('') | ||||
| 
 | ||||
|     def iter_with_progress(self, sequence, desc_format=None, every=1): | ||||
|         ''' Iterate through sequence while automatically adding progress. | ||||
|         ''' | ||||
|     def iter_with_progress(self, iterable, desc_format=None, every=1, count=None): | ||||
|         """Iterate through ``iterable`` while automatically adding progress. | ||||
| 
 | ||||
|         WARNING: We need our iterable's length. If ``iterable`` is not a sequence (that is, | ||||
|         something we can call ``len()`` on), you *have* to specify a count through the ``count`` | ||||
|         argument. If ``count`` is ``None``, ``len(iterable)`` is used. | ||||
|         """ | ||||
|         if count is None: | ||||
|             count = len(iterable) | ||||
|         desc = '' | ||||
|         if desc_format: | ||||
|             desc = desc_format % (0, len(sequence)) | ||||
|         self.start_job(len(sequence), desc) | ||||
|         for i, element in enumerate(sequence, start=1): | ||||
|             desc = desc_format % (0, count) | ||||
|         self.start_job(count, desc) | ||||
|         for i, element in enumerate(iterable, start=1): | ||||
|             yield element | ||||
|             if i % every == 0: | ||||
|                 if desc_format: | ||||
|                     desc = desc_format % (i, len(sequence)) | ||||
|                     desc = desc_format % (i, count) | ||||
|                 self.add_progress(progress=every, desc=desc) | ||||
|         if desc_format: | ||||
|             desc = desc_format % (len(sequence), len(sequence)) | ||||
|             desc = desc_format % (count, count) | ||||
|         self.set_progress(100, desc) | ||||
| 
 | ||||
|     def start_job(self, max_progress=100, desc=''): | ||||
|  | ||||
| @ -117,6 +117,24 @@ def trailiter(iterable, skipfirst=False): | ||||
|         yield prev, item | ||||
|         prev = item | ||||
| 
 | ||||
| def iterconsume(seq): | ||||
|     """Iterate over ``seq`` and discard yielded objects. | ||||
| 
 | ||||
|     Right after the ``yield``, we replace the element we've just yielded by ``None`` in the | ||||
|     sequence. | ||||
| 
 | ||||
|     This is useful in tight memory situation where you are looping over a sequence of objects that | ||||
|     are going to be discarded afterwards. If you're creating other objects during that iteration | ||||
|     you might want to use this to avoid ``MemoryError``. | ||||
| 
 | ||||
|     Note that this only works for sequence (index accessible), not all iterables. | ||||
|     """ | ||||
|     # We don't use ``del``, because it would be disastrous performance-wise as the array would have | ||||
|     # to be constantly re-allocated. | ||||
|     for index, elem in enumerate(seq): | ||||
|         seq[index] = None | ||||
|         yield elem | ||||
| 
 | ||||
| #--- String related | ||||
| 
 | ||||
| def escape(s, to_escape, escape_with='\\'): | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user