diff --git a/.hgignore b/.hgignore index 2de004e5..53c6f972 100644 --- a/.hgignore +++ b/.hgignore @@ -15,7 +15,8 @@ cocoa/*/build cocoa/*/dg_cocoa.plugin qt/base/*_rc.py qt/base/*_ui.py -qt/se/*_ui.py +qt/*/*_ui.py +qt/pe/modules/block/block.c help_se/dupeguru_help help_me/dupeguru_me_help help_pe/dupeguru_pe_help \ No newline at end of file diff --git a/cocoa/pe/dg_cocoa.py b/cocoa/pe/dg_cocoa.py index 49634f13..6d39ba92 100644 --- a/cocoa/pe/dg_cocoa.py +++ b/cocoa/pe/dg_cocoa.py @@ -38,7 +38,7 @@ class PyDupeGuru(PyApp): self.app.scanner.ignore_list.Clear() def clearPictureCache(self): - self.app.scanner.cached_blocks.clear() + self.app.scanner.clear_picture_cache() def doScan(self): return self.app.start_scanning() diff --git a/core_pe/app_cocoa.py b/core_pe/app_cocoa.py index 08c3525f..14febe30 100644 --- a/core_pe/app_cocoa.py +++ b/core_pe/app_cocoa.py @@ -136,8 +136,7 @@ class DupeGuruPE(app_cocoa.DupeGuru): app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5) self.scanner = ScannerPE() self.directories = Directories() - p = op.join(self.appdata, 'cached_pictures.db') - self.scanner.cached_blocks = Cache(p) + self.scanner.cache_path = op.join(self.appdata, 'cached_pictures.db') def _do_delete(self, j): def op(dupe): diff --git a/core_pe/cache.py b/core_pe/cache.py index d642b52b..d44ecccd 100644 --- a/core_pe/cache.py +++ b/core_pe/cache.py @@ -10,8 +10,6 @@ import os import logging import sqlite3 as sqlite -import hsutil.sqlite - from _cache import string_to_colors def colors_to_string(colors): @@ -35,31 +33,10 @@ def colors_to_string(colors): class Cache(object): """A class to cache picture blocks. """ - def __init__(self, db=':memory:', threaded=True): - def create_tables(): - sql = "create table pictures(path TEXT, blocks TEXT)" - self.con.execute(sql); - sql = "create index idx_path on pictures (path)" - self.con.execute(sql) - + def __init__(self, db=':memory:'): self.dbname = db - if threaded: - self.con = hsutil.sqlite.ThreadedConn(db, True) - else: - self.con = sqlite.connect(db, isolation_level=None) - try: - self.con.execute("select * from pictures where 1=2") - except sqlite.OperationalError: # new db - create_tables() - except sqlite.DatabaseError, e: # corrupted db - logging.warning('Could not create picture cache because of an error: %s', str(e)) - self.con.close() - os.remove(db) - if threaded: - self.con = hsutil.sqlite.ThreadedConn(db, True) - else: - self.con = sqlite.connect(db, isolation_level=None) - create_tables() + self.con = None + self._create_con() def __contains__(self, key): sql = "select count(*) from pictures where path = ?" @@ -108,9 +85,36 @@ class Cache(object): except sqlite.DatabaseError, e: logging.warning('DatabaseError while setting %r for key %r: %s', value, key, str(e)) + def _create_con(self, second_try=False): + def create_tables(): + sql = "create table pictures(path TEXT, blocks TEXT)" + self.con.execute(sql); + sql = "create index idx_path on pictures (path)" + self.con.execute(sql) + + self.con = sqlite.connect(self.dbname, isolation_level=None) + try: + self.con.execute("select * from pictures where 1=2") + except sqlite.OperationalError: # new db + create_tables() + except sqlite.DatabaseError, e: # corrupted db + if second_try: + raise # Something really strange is happening + logging.warning('Could not create picture cache because of an error: %s', str(e)) + self.con.close() + os.remove(self.dbname) + self._create_con(second_try=True) + def clear(self): - sql = "delete from pictures" - self.con.execute(sql) + self.close() + if self.dbname != ':memory:': + os.remove(self.dbname) + self._create_con() + + def close(self): + if self.con is not None: + self.con.close() + self.con = None def filter(self, func): to_delete = [key for key in self if not func(key)] diff --git a/core_pe/matchbase.py b/core_pe/matchbase.py index 678455e8..a988e2bb 100644 --- a/core_pe/matchbase.py +++ b/core_pe/matchbase.py @@ -26,20 +26,21 @@ BLOCK_COUNT_PER_SIDE = 15 # collection made by the main process. RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2 -def prepare_pictures(pictures, cached_blocks, j=job.nulljob): +def prepare_pictures(pictures, cache_path, j=job.nulljob): # The MemoryError handlers in there use logging without first caring about whether or not # there is enough memory left to carry on the operation because it is assumed that the # MemoryError happens when trying to read an image file, which is freed from memory by the # time that MemoryError is raised. + cache = Cache(cache_path) prepared = [] # only pictures for which there was no error getting blocks try: for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'): picture.dimensions picture.unicode_path = unicode(picture.path) try: - if picture.unicode_path not in cached_blocks: + if picture.unicode_path not in cache: blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE) - cached_blocks[picture.unicode_path] = blocks + cache[picture.unicode_path] = blocks prepared.append(picture) except IOError as e: logging.warning(unicode(e)) @@ -49,6 +50,7 @@ def prepare_pictures(pictures, cached_blocks, j=job.nulljob): raise except MemoryError: logging.warning('Ran out of memory while preparing pictures') + cache.close() return prepared def get_match(first, second, percentage): @@ -57,7 +59,7 @@ def get_match(first, second, percentage): return Match(first, second, percentage) def async_compare(ref_id, other_ids, dbname, threshold): - cache = Cache(dbname, threaded=False) + cache = Cache(dbname) limit = 100 - threshold ref_blocks = cache[ref_id] pairs = cache.get_multiple(other_ids) @@ -70,10 +72,10 @@ def async_compare(ref_id, other_ids, dbname, threshold): percentage = 0 if percentage >= threshold: results.append((ref_id, other_id, percentage)) - cache.con.close() + cache.close() return results -def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob): +def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nulljob): def empty_out_queue(queue, into): try: while True: @@ -82,9 +84,9 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job. pass j = j.start_subjob([3, 7]) - pictures = prepare_pictures(pictures, cached_blocks, j) + pictures = prepare_pictures(pictures, cache_path, j) j = j.start_subjob([9, 1], 'Preparing for matching') - cache = cached_blocks + cache = Cache(cache_path) id2picture = {} dimensions2pictures = defaultdict(set) for picture in pictures: @@ -95,6 +97,7 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job. dimensions2pictures[picture.dimensions].add(picture) except ValueError: pass + cache.close() pictures = [p for p in pictures if hasattr(p, 'cache_id')] pool = multiprocessing.Pool() async_results = [] @@ -108,7 +111,7 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job. others = [pic for pic in others if not pic.is_ref] if others: cache_ids = [f.cache_id for f in others] - args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold) + args = (ref.cache_id, cache_ids, cache_path, threshold) async_results.append(pool.apply_async(async_compare, args)) if len(async_results) > RESULTS_QUEUE_LIMIT: result = async_results.pop(0) diff --git a/core_pe/scanner.py b/core_pe/scanner.py index cf57a5f4..1234fa85 100644 --- a/core_pe/scanner.py +++ b/core_pe/scanner.py @@ -10,12 +10,18 @@ from core.scanner import Scanner from . import matchbase +from .cache import Cache class ScannerPE(Scanner): - cached_blocks = None + cache_path = None match_scaled = False threshold = 75 def _getmatches(self, files, j): - return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j) + return matchbase.getmatches(files, self.cache_path, self.threshold, self.match_scaled, j) + + def clear_picture_cache(self): + cache = Cache(self.cache_path) + cache.clear() + cache.close() diff --git a/qt/pe/app.py b/qt/pe/app.py index ceaddca2..ac2c437e 100644 --- a/qt/pe/app.py +++ b/qt/pe/app.py @@ -65,7 +65,7 @@ class DupeGuru(DupeGuruBase): def _setup(self): self.scanner = ScannerPE() self.directories.fileclasses = [File] - self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db')) + self.scanner.cache_path = op.join(self.appdata, 'cached_pictures.db') DupeGuruBase._setup(self) def _update_options(self): diff --git a/qt/pe/main_window.py b/qt/pe/main_window.py index c5cfe1f8..3dc3cfaa 100644 --- a/qt/pe/main_window.py +++ b/qt/pe/main_window.py @@ -22,6 +22,6 @@ class MainWindow(MainWindowBase): title = "Clear Picture Cache" msg = "Do you really want to remove all your cached picture analysis?" if self._confirm(title, msg, QMessageBox.No): - self.app.scanner.cached_blocks.clear() + self.app.scanner.clear_picture_cache() QMessageBox.information(self, title, "Picture cache cleared.") \ No newline at end of file