mirror of
				https://github.com/arsenetar/dupeguru.git
				synced 2025-09-11 17:58:17 +00:00 
			
		
		
		
	Straightened out the blocks cache. Instead of having a single global threaded block cache in the app, there's just a cache path, and non-threaded caches are created when needed. Also, made Cache.clear() more robust (it will clear the cache even if the db is corrupted).
This commit is contained in:
		
							parent
							
								
									06607aabb2
								
							
						
					
					
						commit
						fc76a843d5
					
				| @ -15,7 +15,8 @@ cocoa/*/build | ||||
| cocoa/*/dg_cocoa.plugin | ||||
| qt/base/*_rc.py | ||||
| qt/base/*_ui.py | ||||
| qt/se/*_ui.py | ||||
| qt/*/*_ui.py | ||||
| qt/pe/modules/block/block.c | ||||
| help_se/dupeguru_help | ||||
| help_me/dupeguru_me_help | ||||
| help_pe/dupeguru_pe_help | ||||
| @ -38,7 +38,7 @@ class PyDupeGuru(PyApp): | ||||
|         self.app.scanner.ignore_list.Clear() | ||||
|      | ||||
|     def clearPictureCache(self): | ||||
|         self.app.scanner.cached_blocks.clear() | ||||
|         self.app.scanner.clear_picture_cache() | ||||
|      | ||||
|     def doScan(self): | ||||
|         return self.app.start_scanning() | ||||
|  | ||||
| @ -136,8 +136,7 @@ class DupeGuruPE(app_cocoa.DupeGuru): | ||||
|         app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5) | ||||
|         self.scanner = ScannerPE() | ||||
|         self.directories = Directories() | ||||
|         p = op.join(self.appdata, 'cached_pictures.db') | ||||
|         self.scanner.cached_blocks = Cache(p) | ||||
|         self.scanner.cache_path = op.join(self.appdata, 'cached_pictures.db') | ||||
|      | ||||
|     def _do_delete(self, j): | ||||
|         def op(dupe): | ||||
|  | ||||
| @ -10,8 +10,6 @@ import os | ||||
| import logging | ||||
| import sqlite3 as sqlite | ||||
| 
 | ||||
| import hsutil.sqlite | ||||
| 
 | ||||
| from _cache import string_to_colors | ||||
| 
 | ||||
| def colors_to_string(colors): | ||||
| @ -35,31 +33,10 @@ def colors_to_string(colors): | ||||
| class Cache(object): | ||||
|     """A class to cache picture blocks. | ||||
|     """ | ||||
|     def __init__(self, db=':memory:', threaded=True): | ||||
|         def create_tables(): | ||||
|             sql = "create table pictures(path TEXT, blocks TEXT)" | ||||
|             self.con.execute(sql); | ||||
|             sql = "create index idx_path on pictures (path)" | ||||
|             self.con.execute(sql) | ||||
|          | ||||
|     def __init__(self, db=':memory:'): | ||||
|         self.dbname = db | ||||
|         if threaded: | ||||
|             self.con = hsutil.sqlite.ThreadedConn(db, True) | ||||
|         else: | ||||
|             self.con = sqlite.connect(db, isolation_level=None) | ||||
|         try: | ||||
|             self.con.execute("select * from pictures where 1=2") | ||||
|         except sqlite.OperationalError: # new db | ||||
|             create_tables() | ||||
|         except sqlite.DatabaseError, e: # corrupted db | ||||
|             logging.warning('Could not create picture cache because of an error: %s', str(e)) | ||||
|             self.con.close() | ||||
|             os.remove(db) | ||||
|             if threaded: | ||||
|                 self.con = hsutil.sqlite.ThreadedConn(db, True) | ||||
|             else: | ||||
|                 self.con = sqlite.connect(db, isolation_level=None) | ||||
|             create_tables() | ||||
|         self.con = None | ||||
|         self._create_con() | ||||
|      | ||||
|     def __contains__(self, key): | ||||
|         sql = "select count(*) from pictures where path = ?" | ||||
| @ -108,9 +85,36 @@ class Cache(object): | ||||
|         except sqlite.DatabaseError, e: | ||||
|             logging.warning('DatabaseError while setting %r for key %r: %s', value, key, str(e)) | ||||
|      | ||||
|     def _create_con(self, second_try=False): | ||||
|         def create_tables(): | ||||
|             sql = "create table pictures(path TEXT, blocks TEXT)" | ||||
|             self.con.execute(sql); | ||||
|             sql = "create index idx_path on pictures (path)" | ||||
|             self.con.execute(sql) | ||||
|          | ||||
|         self.con = sqlite.connect(self.dbname, isolation_level=None) | ||||
|         try: | ||||
|             self.con.execute("select * from pictures where 1=2") | ||||
|         except sqlite.OperationalError: # new db | ||||
|             create_tables() | ||||
|         except sqlite.DatabaseError, e: # corrupted db | ||||
|             if second_try: | ||||
|                 raise # Something really strange is happening | ||||
|             logging.warning('Could not create picture cache because of an error: %s', str(e)) | ||||
|             self.con.close() | ||||
|             os.remove(self.dbname) | ||||
|             self._create_con(second_try=True) | ||||
|      | ||||
|     def clear(self): | ||||
|         sql = "delete from pictures" | ||||
|         self.con.execute(sql) | ||||
|         self.close() | ||||
|         if self.dbname != ':memory:': | ||||
|             os.remove(self.dbname) | ||||
|         self._create_con() | ||||
|      | ||||
|     def close(self): | ||||
|         if self.con is not None: | ||||
|             self.con.close() | ||||
|         self.con = None | ||||
|      | ||||
|     def filter(self, func): | ||||
|         to_delete = [key for key in self if not func(key)] | ||||
|  | ||||
| @ -26,20 +26,21 @@ BLOCK_COUNT_PER_SIDE = 15 | ||||
| # collection made by the main process. | ||||
| RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2 | ||||
| 
 | ||||
| def prepare_pictures(pictures, cached_blocks, j=job.nulljob): | ||||
| def prepare_pictures(pictures, cache_path, j=job.nulljob): | ||||
|     # The MemoryError handlers in there use logging without first caring about whether or not | ||||
|     # there is enough memory left to carry on the operation because it is assumed that the | ||||
|     # MemoryError happens when trying to read an image file, which is freed from memory by the | ||||
|     # time that MemoryError is raised. | ||||
|     cache = Cache(cache_path) | ||||
|     prepared = [] # only pictures for which there was no error getting blocks | ||||
|     try: | ||||
|         for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'): | ||||
|             picture.dimensions | ||||
|             picture.unicode_path = unicode(picture.path) | ||||
|             try: | ||||
|                 if picture.unicode_path not in cached_blocks: | ||||
|                 if picture.unicode_path not in cache: | ||||
|                     blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE) | ||||
|                     cached_blocks[picture.unicode_path] = blocks | ||||
|                     cache[picture.unicode_path] = blocks | ||||
|                 prepared.append(picture) | ||||
|             except IOError as e: | ||||
|                 logging.warning(unicode(e)) | ||||
| @ -49,6 +50,7 @@ def prepare_pictures(pictures, cached_blocks, j=job.nulljob): | ||||
|                     raise | ||||
|     except MemoryError: | ||||
|         logging.warning('Ran out of memory while preparing pictures') | ||||
|     cache.close() | ||||
|     return prepared | ||||
| 
 | ||||
| def get_match(first, second, percentage): | ||||
| @ -57,7 +59,7 @@ def get_match(first, second, percentage): | ||||
|     return Match(first, second, percentage) | ||||
| 
 | ||||
| def async_compare(ref_id, other_ids, dbname, threshold): | ||||
|     cache = Cache(dbname, threaded=False) | ||||
|     cache = Cache(dbname) | ||||
|     limit = 100 - threshold | ||||
|     ref_blocks = cache[ref_id] | ||||
|     pairs = cache.get_multiple(other_ids) | ||||
| @ -70,10 +72,10 @@ def async_compare(ref_id, other_ids, dbname, threshold): | ||||
|             percentage = 0 | ||||
|         if percentage >= threshold: | ||||
|             results.append((ref_id, other_id, percentage)) | ||||
|     cache.con.close() | ||||
|     cache.close() | ||||
|     return results | ||||
|      | ||||
| def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob): | ||||
| def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nulljob): | ||||
|     def empty_out_queue(queue, into): | ||||
|         try: | ||||
|             while True: | ||||
| @ -82,9 +84,9 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job. | ||||
|             pass | ||||
|      | ||||
|     j = j.start_subjob([3, 7]) | ||||
|     pictures = prepare_pictures(pictures, cached_blocks, j) | ||||
|     pictures = prepare_pictures(pictures, cache_path, j) | ||||
|     j = j.start_subjob([9, 1], 'Preparing for matching') | ||||
|     cache = cached_blocks | ||||
|     cache = Cache(cache_path) | ||||
|     id2picture = {} | ||||
|     dimensions2pictures = defaultdict(set) | ||||
|     for picture in pictures: | ||||
| @ -95,6 +97,7 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job. | ||||
|                 dimensions2pictures[picture.dimensions].add(picture) | ||||
|         except ValueError: | ||||
|             pass | ||||
|     cache.close() | ||||
|     pictures = [p for p in pictures if hasattr(p, 'cache_id')] | ||||
|     pool = multiprocessing.Pool() | ||||
|     async_results = [] | ||||
| @ -108,7 +111,7 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job. | ||||
|             others = [pic for pic in others if not pic.is_ref] | ||||
|         if others: | ||||
|             cache_ids = [f.cache_id for f in others] | ||||
|             args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold) | ||||
|             args = (ref.cache_id, cache_ids, cache_path, threshold) | ||||
|             async_results.append(pool.apply_async(async_compare, args)) | ||||
|         if len(async_results) > RESULTS_QUEUE_LIMIT: | ||||
|             result = async_results.pop(0) | ||||
|  | ||||
| @ -10,12 +10,18 @@ | ||||
| from core.scanner import Scanner | ||||
| 
 | ||||
| from . import matchbase | ||||
| from .cache import Cache | ||||
| 
 | ||||
| class ScannerPE(Scanner): | ||||
|     cached_blocks = None | ||||
|     cache_path = None | ||||
|     match_scaled = False | ||||
|     threshold = 75 | ||||
|      | ||||
|     def _getmatches(self, files, j): | ||||
|         return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j) | ||||
|         return matchbase.getmatches(files, self.cache_path, self.threshold, self.match_scaled, j) | ||||
|      | ||||
|     def clear_picture_cache(self): | ||||
|         cache = Cache(self.cache_path) | ||||
|         cache.clear() | ||||
|         cache.close() | ||||
|      | ||||
|  | ||||
| @ -65,7 +65,7 @@ class DupeGuru(DupeGuruBase): | ||||
|     def _setup(self): | ||||
|         self.scanner = ScannerPE() | ||||
|         self.directories.fileclasses = [File] | ||||
|         self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db')) | ||||
|         self.scanner.cache_path = op.join(self.appdata, 'cached_pictures.db') | ||||
|         DupeGuruBase._setup(self) | ||||
|      | ||||
|     def _update_options(self): | ||||
|  | ||||
| @ -22,6 +22,6 @@ class MainWindow(MainWindowBase): | ||||
|         title = "Clear Picture Cache" | ||||
|         msg = "Do you really want to remove all your cached picture analysis?" | ||||
|         if self._confirm(title, msg, QMessageBox.No): | ||||
|             self.app.scanner.cached_blocks.clear() | ||||
|             self.app.scanner.clear_picture_cache() | ||||
|             QMessageBox.information(self, title, "Picture cache cleared.") | ||||
|      | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user