1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2025-03-10 05:34:36 +00:00

Straightened out the blocks cache. Instead of having a single global threaded block cache in the app, there's just a cache path, and non-threaded caches are created when needed. Also, made Cache.clear() more robust (it will clear the cache even if the db is corrupted).

This commit is contained in:
Virgil Dupras 2010-01-14 16:14:26 +01:00
parent 06607aabb2
commit fc76a843d5
8 changed files with 58 additions and 45 deletions

View File

@ -15,7 +15,8 @@ cocoa/*/build
cocoa/*/dg_cocoa.plugin cocoa/*/dg_cocoa.plugin
qt/base/*_rc.py qt/base/*_rc.py
qt/base/*_ui.py qt/base/*_ui.py
qt/se/*_ui.py qt/*/*_ui.py
qt/pe/modules/block/block.c
help_se/dupeguru_help help_se/dupeguru_help
help_me/dupeguru_me_help help_me/dupeguru_me_help
help_pe/dupeguru_pe_help help_pe/dupeguru_pe_help

View File

@ -38,7 +38,7 @@ class PyDupeGuru(PyApp):
self.app.scanner.ignore_list.Clear() self.app.scanner.ignore_list.Clear()
def clearPictureCache(self): def clearPictureCache(self):
self.app.scanner.cached_blocks.clear() self.app.scanner.clear_picture_cache()
def doScan(self): def doScan(self):
return self.app.start_scanning() return self.app.start_scanning()

View File

@ -136,8 +136,7 @@ class DupeGuruPE(app_cocoa.DupeGuru):
app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5) app_cocoa.DupeGuru.__init__(self, data, 'dupeGuru Picture Edition', appid=5)
self.scanner = ScannerPE() self.scanner = ScannerPE()
self.directories = Directories() self.directories = Directories()
p = op.join(self.appdata, 'cached_pictures.db') self.scanner.cache_path = op.join(self.appdata, 'cached_pictures.db')
self.scanner.cached_blocks = Cache(p)
def _do_delete(self, j): def _do_delete(self, j):
def op(dupe): def op(dupe):

View File

@ -10,8 +10,6 @@ import os
import logging import logging
import sqlite3 as sqlite import sqlite3 as sqlite
import hsutil.sqlite
from _cache import string_to_colors from _cache import string_to_colors
def colors_to_string(colors): def colors_to_string(colors):
@ -35,31 +33,10 @@ def colors_to_string(colors):
class Cache(object): class Cache(object):
"""A class to cache picture blocks. """A class to cache picture blocks.
""" """
def __init__(self, db=':memory:', threaded=True): def __init__(self, db=':memory:'):
def create_tables():
sql = "create table pictures(path TEXT, blocks TEXT)"
self.con.execute(sql);
sql = "create index idx_path on pictures (path)"
self.con.execute(sql)
self.dbname = db self.dbname = db
if threaded: self.con = None
self.con = hsutil.sqlite.ThreadedConn(db, True) self._create_con()
else:
self.con = sqlite.connect(db, isolation_level=None)
try:
self.con.execute("select * from pictures where 1=2")
except sqlite.OperationalError: # new db
create_tables()
except sqlite.DatabaseError, e: # corrupted db
logging.warning('Could not create picture cache because of an error: %s', str(e))
self.con.close()
os.remove(db)
if threaded:
self.con = hsutil.sqlite.ThreadedConn(db, True)
else:
self.con = sqlite.connect(db, isolation_level=None)
create_tables()
def __contains__(self, key): def __contains__(self, key):
sql = "select count(*) from pictures where path = ?" sql = "select count(*) from pictures where path = ?"
@ -108,9 +85,36 @@ class Cache(object):
except sqlite.DatabaseError, e: except sqlite.DatabaseError, e:
logging.warning('DatabaseError while setting %r for key %r: %s', value, key, str(e)) logging.warning('DatabaseError while setting %r for key %r: %s', value, key, str(e))
def _create_con(self, second_try=False):
def create_tables():
sql = "create table pictures(path TEXT, blocks TEXT)"
self.con.execute(sql);
sql = "create index idx_path on pictures (path)"
self.con.execute(sql)
self.con = sqlite.connect(self.dbname, isolation_level=None)
try:
self.con.execute("select * from pictures where 1=2")
except sqlite.OperationalError: # new db
create_tables()
except sqlite.DatabaseError, e: # corrupted db
if second_try:
raise # Something really strange is happening
logging.warning('Could not create picture cache because of an error: %s', str(e))
self.con.close()
os.remove(self.dbname)
self._create_con(second_try=True)
def clear(self): def clear(self):
sql = "delete from pictures" self.close()
self.con.execute(sql) if self.dbname != ':memory:':
os.remove(self.dbname)
self._create_con()
def close(self):
if self.con is not None:
self.con.close()
self.con = None
def filter(self, func): def filter(self, func):
to_delete = [key for key in self if not func(key)] to_delete = [key for key in self if not func(key)]

View File

@ -26,20 +26,21 @@ BLOCK_COUNT_PER_SIDE = 15
# collection made by the main process. # collection made by the main process.
RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2 RESULTS_QUEUE_LIMIT = multiprocessing.cpu_count() * 2
def prepare_pictures(pictures, cached_blocks, j=job.nulljob): def prepare_pictures(pictures, cache_path, j=job.nulljob):
# The MemoryError handlers in there use logging without first caring about whether or not # The MemoryError handlers in there use logging without first caring about whether or not
# there is enough memory left to carry on the operation because it is assumed that the # there is enough memory left to carry on the operation because it is assumed that the
# MemoryError happens when trying to read an image file, which is freed from memory by the # MemoryError happens when trying to read an image file, which is freed from memory by the
# time that MemoryError is raised. # time that MemoryError is raised.
cache = Cache(cache_path)
prepared = [] # only pictures for which there was no error getting blocks prepared = [] # only pictures for which there was no error getting blocks
try: try:
for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'): for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'):
picture.dimensions picture.dimensions
picture.unicode_path = unicode(picture.path) picture.unicode_path = unicode(picture.path)
try: try:
if picture.unicode_path not in cached_blocks: if picture.unicode_path not in cache:
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE) blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
cached_blocks[picture.unicode_path] = blocks cache[picture.unicode_path] = blocks
prepared.append(picture) prepared.append(picture)
except IOError as e: except IOError as e:
logging.warning(unicode(e)) logging.warning(unicode(e))
@ -49,6 +50,7 @@ def prepare_pictures(pictures, cached_blocks, j=job.nulljob):
raise raise
except MemoryError: except MemoryError:
logging.warning('Ran out of memory while preparing pictures') logging.warning('Ran out of memory while preparing pictures')
cache.close()
return prepared return prepared
def get_match(first, second, percentage): def get_match(first, second, percentage):
@ -57,7 +59,7 @@ def get_match(first, second, percentage):
return Match(first, second, percentage) return Match(first, second, percentage)
def async_compare(ref_id, other_ids, dbname, threshold): def async_compare(ref_id, other_ids, dbname, threshold):
cache = Cache(dbname, threaded=False) cache = Cache(dbname)
limit = 100 - threshold limit = 100 - threshold
ref_blocks = cache[ref_id] ref_blocks = cache[ref_id]
pairs = cache.get_multiple(other_ids) pairs = cache.get_multiple(other_ids)
@ -70,10 +72,10 @@ def async_compare(ref_id, other_ids, dbname, threshold):
percentage = 0 percentage = 0
if percentage >= threshold: if percentage >= threshold:
results.append((ref_id, other_id, percentage)) results.append((ref_id, other_id, percentage))
cache.con.close() cache.close()
return results return results
def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.nulljob): def getmatches(pictures, cache_path, threshold=75, match_scaled=False, j=job.nulljob):
def empty_out_queue(queue, into): def empty_out_queue(queue, into):
try: try:
while True: while True:
@ -82,9 +84,9 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.
pass pass
j = j.start_subjob([3, 7]) j = j.start_subjob([3, 7])
pictures = prepare_pictures(pictures, cached_blocks, j) pictures = prepare_pictures(pictures, cache_path, j)
j = j.start_subjob([9, 1], 'Preparing for matching') j = j.start_subjob([9, 1], 'Preparing for matching')
cache = cached_blocks cache = Cache(cache_path)
id2picture = {} id2picture = {}
dimensions2pictures = defaultdict(set) dimensions2pictures = defaultdict(set)
for picture in pictures: for picture in pictures:
@ -95,6 +97,7 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.
dimensions2pictures[picture.dimensions].add(picture) dimensions2pictures[picture.dimensions].add(picture)
except ValueError: except ValueError:
pass pass
cache.close()
pictures = [p for p in pictures if hasattr(p, 'cache_id')] pictures = [p for p in pictures if hasattr(p, 'cache_id')]
pool = multiprocessing.Pool() pool = multiprocessing.Pool()
async_results = [] async_results = []
@ -108,7 +111,7 @@ def getmatches(pictures, cached_blocks, threshold=75, match_scaled=False, j=job.
others = [pic for pic in others if not pic.is_ref] others = [pic for pic in others if not pic.is_ref]
if others: if others:
cache_ids = [f.cache_id for f in others] cache_ids = [f.cache_id for f in others]
args = (ref.cache_id, cache_ids, cached_blocks.dbname, threshold) args = (ref.cache_id, cache_ids, cache_path, threshold)
async_results.append(pool.apply_async(async_compare, args)) async_results.append(pool.apply_async(async_compare, args))
if len(async_results) > RESULTS_QUEUE_LIMIT: if len(async_results) > RESULTS_QUEUE_LIMIT:
result = async_results.pop(0) result = async_results.pop(0)

View File

@ -10,12 +10,18 @@
from core.scanner import Scanner from core.scanner import Scanner
from . import matchbase from . import matchbase
from .cache import Cache
class ScannerPE(Scanner): class ScannerPE(Scanner):
cached_blocks = None cache_path = None
match_scaled = False match_scaled = False
threshold = 75 threshold = 75
def _getmatches(self, files, j): def _getmatches(self, files, j):
return matchbase.getmatches(files, self.cached_blocks, self.threshold, self.match_scaled, j) return matchbase.getmatches(files, self.cache_path, self.threshold, self.match_scaled, j)
def clear_picture_cache(self):
cache = Cache(self.cache_path)
cache.clear()
cache.close()

View File

@ -65,7 +65,7 @@ class DupeGuru(DupeGuruBase):
def _setup(self): def _setup(self):
self.scanner = ScannerPE() self.scanner = ScannerPE()
self.directories.fileclasses = [File] self.directories.fileclasses = [File]
self.scanner.cached_blocks = Cache(op.join(self.appdata, 'cached_pictures.db')) self.scanner.cache_path = op.join(self.appdata, 'cached_pictures.db')
DupeGuruBase._setup(self) DupeGuruBase._setup(self)
def _update_options(self): def _update_options(self):

View File

@ -22,6 +22,6 @@ class MainWindow(MainWindowBase):
title = "Clear Picture Cache" title = "Clear Picture Cache"
msg = "Do you really want to remove all your cached picture analysis?" msg = "Do you really want to remove all your cached picture analysis?"
if self._confirm(title, msg, QMessageBox.No): if self._confirm(title, msg, QMessageBox.No):
self.app.scanner.cached_blocks.clear() self.app.scanner.clear_picture_cache()
QMessageBox.information(self, title, "Picture cache cleared.") QMessageBox.information(self, title, "Picture cache cleared.")