mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-11-18 21:19:01 +00:00
Move pe.cache.Cache into its own unit, cache_sqlite
This prepares us for an upcoming alternative cache implementation.
This commit is contained in:
parent
1c20e5c770
commit
cc01e8eb09
@ -405,7 +405,7 @@ class DupeGuru(Broadcaster):
|
||||
path = path.parent()
|
||||
|
||||
def clear_picture_cache(self):
|
||||
cache = pe.cache.Cache(self.options['cache_path'])
|
||||
cache = pe.cache_sqlite.SqliteCache(self.options['cache_path'])
|
||||
cache.clear()
|
||||
cache.close()
|
||||
|
||||
|
@ -1 +1 @@
|
||||
from . import block, cache, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
||||
from . import block, cache, cache_sqlite, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
||||
|
142
core/pe/cache.py
142
core/pe/cache.py
@ -1,17 +1,10 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/09/14
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
# Copyright 2016 Virgil Dupras
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os
|
||||
import os.path as op
|
||||
import logging
|
||||
import sqlite3 as sqlite
|
||||
|
||||
from ._cache import string_to_colors
|
||||
from ._cache import string_to_colors # noqa
|
||||
|
||||
def colors_to_string(colors):
|
||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
||||
@ -19,7 +12,7 @@ def colors_to_string(colors):
|
||||
[(0,100,255)] --> 0064ff
|
||||
[(1,2,3),(4,5,6)] --> 010203040506
|
||||
"""
|
||||
return ''.join(['%02x%02x%02x' % (r, g, b) for r, g, b in colors])
|
||||
return ''.join('%02x%02x%02x' % (r, g, b) for r, g, b in colors)
|
||||
|
||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
||||
# def string_to_colors(s):
|
||||
@ -31,132 +24,3 @@ def colors_to_string(colors):
|
||||
# result.append((number >> 16, (number >> 8) & 0xff, number & 0xff))
|
||||
# return result
|
||||
|
||||
class Cache:
|
||||
"""A class to cache picture blocks.
|
||||
"""
|
||||
def __init__(self, db=':memory:'):
|
||||
self.dbname = db
|
||||
self.con = None
|
||||
self._create_con()
|
||||
|
||||
def __contains__(self, key):
|
||||
sql = "select count(*) from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchall()
|
||||
return result[0][0] > 0
|
||||
|
||||
def __delitem__(self, key):
|
||||
if key not in self:
|
||||
raise KeyError(key)
|
||||
sql = "delete from pictures where path = ?"
|
||||
self.con.execute(sql, [key])
|
||||
|
||||
# Optimized
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, int):
|
||||
sql = "select blocks from pictures where rowid = ?"
|
||||
else:
|
||||
sql = "select blocks from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchone()
|
||||
if result:
|
||||
result = string_to_colors(result[0])
|
||||
return result
|
||||
else:
|
||||
raise KeyError(key)
|
||||
|
||||
def __iter__(self):
|
||||
sql = "select path from pictures"
|
||||
result = self.con.execute(sql)
|
||||
return (row[0] for row in result)
|
||||
|
||||
def __len__(self):
|
||||
sql = "select count(*) from pictures"
|
||||
result = self.con.execute(sql).fetchall()
|
||||
return result[0][0]
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_string(blocks)
|
||||
if op.exists(path_str):
|
||||
mtime = int(os.stat(path_str).st_mtime)
|
||||
else:
|
||||
mtime = 0
|
||||
if path_str in self:
|
||||
sql = "update pictures set blocks = ?, mtime = ? where path = ?"
|
||||
else:
|
||||
sql = "insert into pictures(blocks,mtime,path) values(?,?,?)"
|
||||
try:
|
||||
self.con.execute(sql, [blocks, mtime, path_str])
|
||||
except sqlite.OperationalError:
|
||||
logging.warning('Picture cache could not set value for key %r', path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
|
||||
|
||||
def _create_con(self, second_try=False):
|
||||
def create_tables():
|
||||
logging.debug("Creating picture cache tables.")
|
||||
self.con.execute("drop table if exists pictures")
|
||||
self.con.execute("drop index if exists idx_path")
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
||||
self.con.execute("create index idx_path on pictures (path)")
|
||||
|
||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||
try:
|
||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
||||
except sqlite.OperationalError: # new db
|
||||
create_tables()
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
if second_try:
|
||||
raise # Something really strange is happening
|
||||
logging.warning('Could not create picture cache because of an error: %s', str(e))
|
||||
self.con.close()
|
||||
os.remove(self.dbname)
|
||||
self._create_con(second_try=True)
|
||||
|
||||
def clear(self):
|
||||
self.close()
|
||||
if self.dbname != ':memory:':
|
||||
os.remove(self.dbname)
|
||||
self._create_con()
|
||||
|
||||
def close(self):
|
||||
if self.con is not None:
|
||||
self.con.close()
|
||||
self.con = None
|
||||
|
||||
def filter(self, func):
|
||||
to_delete = [key for key in self if not func(key)]
|
||||
for key in to_delete:
|
||||
del self[key]
|
||||
|
||||
def get_id(self, path):
|
||||
sql = "select rowid from pictures where path = ?"
|
||||
result = self.con.execute(sql, [path]).fetchone()
|
||||
if result:
|
||||
return result[0]
|
||||
else:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
||||
|
||||
def purge_outdated(self):
|
||||
"""Go through the cache and purge outdated records.
|
||||
|
||||
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
|
||||
the db.
|
||||
"""
|
||||
todelete = []
|
||||
sql = "select rowid, path, mtime from pictures"
|
||||
cur = self.con.execute(sql)
|
||||
for rowid, path_str, mtime in cur:
|
||||
if mtime and op.exists(path_str):
|
||||
picture_mtime = os.stat(path_str).st_mtime
|
||||
if int(picture_mtime) <= mtime:
|
||||
# not outdated
|
||||
continue
|
||||
todelete.append(rowid)
|
||||
if todelete:
|
||||
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
|
||||
self.con.execute(sql)
|
||||
|
||||
|
142
core/pe/cache_sqlite.py
Normal file
142
core/pe/cache_sqlite.py
Normal file
@ -0,0 +1,142 @@
|
||||
# Copyright 2016 Virgil Dupras
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os
|
||||
import os.path as op
|
||||
import logging
|
||||
import sqlite3 as sqlite
|
||||
|
||||
from .cache import string_to_colors, colors_to_string
|
||||
|
||||
class SqliteCache:
|
||||
"""A class to cache picture blocks.
|
||||
"""
|
||||
def __init__(self, db=':memory:'):
|
||||
self.dbname = db
|
||||
self.con = None
|
||||
self._create_con()
|
||||
|
||||
def __contains__(self, key):
|
||||
sql = "select count(*) from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchall()
|
||||
return result[0][0] > 0
|
||||
|
||||
def __delitem__(self, key):
|
||||
if key not in self:
|
||||
raise KeyError(key)
|
||||
sql = "delete from pictures where path = ?"
|
||||
self.con.execute(sql, [key])
|
||||
|
||||
# Optimized
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, int):
|
||||
sql = "select blocks from pictures where rowid = ?"
|
||||
else:
|
||||
sql = "select blocks from pictures where path = ?"
|
||||
result = self.con.execute(sql, [key]).fetchone()
|
||||
if result:
|
||||
result = string_to_colors(result[0])
|
||||
return result
|
||||
else:
|
||||
raise KeyError(key)
|
||||
|
||||
def __iter__(self):
|
||||
sql = "select path from pictures"
|
||||
result = self.con.execute(sql)
|
||||
return (row[0] for row in result)
|
||||
|
||||
def __len__(self):
|
||||
sql = "select count(*) from pictures"
|
||||
result = self.con.execute(sql).fetchall()
|
||||
return result[0][0]
|
||||
|
||||
def __setitem__(self, path_str, blocks):
|
||||
blocks = colors_to_string(blocks)
|
||||
if op.exists(path_str):
|
||||
mtime = int(os.stat(path_str).st_mtime)
|
||||
else:
|
||||
mtime = 0
|
||||
if path_str in self:
|
||||
sql = "update pictures set blocks = ?, mtime = ? where path = ?"
|
||||
else:
|
||||
sql = "insert into pictures(blocks,mtime,path) values(?,?,?)"
|
||||
try:
|
||||
self.con.execute(sql, [blocks, mtime, path_str])
|
||||
except sqlite.OperationalError:
|
||||
logging.warning('Picture cache could not set value for key %r', path_str)
|
||||
except sqlite.DatabaseError as e:
|
||||
logging.warning('DatabaseError while setting value for key %r: %s', path_str, str(e))
|
||||
|
||||
def _create_con(self, second_try=False):
|
||||
def create_tables():
|
||||
logging.debug("Creating picture cache tables.")
|
||||
self.con.execute("drop table if exists pictures")
|
||||
self.con.execute("drop index if exists idx_path")
|
||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
||||
self.con.execute("create index idx_path on pictures (path)")
|
||||
|
||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||
try:
|
||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
||||
except sqlite.OperationalError: # new db
|
||||
create_tables()
|
||||
except sqlite.DatabaseError as e: # corrupted db
|
||||
if second_try:
|
||||
raise # Something really strange is happening
|
||||
logging.warning('Could not create picture cache because of an error: %s', str(e))
|
||||
self.con.close()
|
||||
os.remove(self.dbname)
|
||||
self._create_con(second_try=True)
|
||||
|
||||
def clear(self):
|
||||
self.close()
|
||||
if self.dbname != ':memory:':
|
||||
os.remove(self.dbname)
|
||||
self._create_con()
|
||||
|
||||
def close(self):
|
||||
if self.con is not None:
|
||||
self.con.close()
|
||||
self.con = None
|
||||
|
||||
def filter(self, func):
|
||||
to_delete = [key for key in self if not func(key)]
|
||||
for key in to_delete:
|
||||
del self[key]
|
||||
|
||||
def get_id(self, path):
|
||||
sql = "select rowid from pictures where path = ?"
|
||||
result = self.con.execute(sql, [path]).fetchone()
|
||||
if result:
|
||||
return result[0]
|
||||
else:
|
||||
raise ValueError(path)
|
||||
|
||||
def get_multiple(self, rowids):
|
||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ','.join(map(str, rowids))
|
||||
cur = self.con.execute(sql)
|
||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
||||
|
||||
def purge_outdated(self):
|
||||
"""Go through the cache and purge outdated records.
|
||||
|
||||
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
|
||||
the db.
|
||||
"""
|
||||
todelete = []
|
||||
sql = "select rowid, path, mtime from pictures"
|
||||
cur = self.con.execute(sql)
|
||||
for rowid, path_str, mtime in cur:
|
||||
if mtime and op.exists(path_str):
|
||||
picture_mtime = os.stat(path_str).st_mtime
|
||||
if int(picture_mtime) <= mtime:
|
||||
# not outdated
|
||||
continue
|
||||
todelete.append(rowid)
|
||||
if todelete:
|
||||
sql = "delete from pictures where rowid in (%s)" % ','.join(map(str, todelete))
|
||||
self.con.execute(sql)
|
||||
|
@ -16,7 +16,7 @@ from hscommon.jobprogress import job
|
||||
|
||||
from core.engine import Match
|
||||
from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
||||
from .cache import Cache
|
||||
from .cache_sqlite import SqliteCache
|
||||
|
||||
# OPTIMIZATION NOTES:
|
||||
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
|
||||
@ -54,7 +54,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||
# there is enough memory left to carry on the operation because it is assumed that the
|
||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||
# time that MemoryError is raised.
|
||||
cache = Cache(cache_path)
|
||||
cache = SqliteCache(cache_path)
|
||||
cache.purge_outdated()
|
||||
prepared = [] # only pictures for which there was no error getting blocks
|
||||
try:
|
||||
@ -109,7 +109,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||
# can be None. In this case, ref_ids has to be compared with itself
|
||||
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
|
||||
cache = Cache(dbname)
|
||||
cache = SqliteCache(dbname)
|
||||
limit = 100 - threshold
|
||||
ref_pairs = list(cache.get_multiple(ref_ids))
|
||||
if other_ids is not None:
|
||||
@ -159,7 +159,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
||||
j = j.start_subjob([3, 7])
|
||||
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
||||
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
||||
cache = Cache(cache_path)
|
||||
cache = SqliteCache(cache_path)
|
||||
id2picture = {}
|
||||
for picture in pictures:
|
||||
try:
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Copyright 2016 Hardcoded Software (http://www.hardcoded.net)
|
||||
# Copyright 2016 Virgil Dupras
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
@ -10,7 +10,8 @@ from pytest import raises, skip
|
||||
from hscommon.testutil import eq_
|
||||
|
||||
try:
|
||||
from ..pe.cache import Cache, colors_to_string, string_to_colors
|
||||
from ..pe.cache import colors_to_string, string_to_colors
|
||||
from ..pe.cache_sqlite import SqliteCache
|
||||
except ImportError:
|
||||
skip("Can't import the cache module, probably hasn't been compiled.")
|
||||
|
||||
@ -44,21 +45,24 @@ class TestCasestring_to_colors:
|
||||
eq_([], string_to_colors('102'))
|
||||
|
||||
|
||||
class TestCaseCache:
|
||||
class BaseTestCaseCache:
|
||||
def get_cache(self, dbname=None):
|
||||
raise NotImplementedError()
|
||||
|
||||
def test_empty(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
eq_(0, len(c))
|
||||
with raises(KeyError):
|
||||
c['foo']
|
||||
|
||||
def test_set_then_retrieve_blocks(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
b = [(0, 0, 0), (1, 2, 3)]
|
||||
c['foo'] = b
|
||||
eq_(b, c['foo'])
|
||||
|
||||
def test_delitem(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
del c['foo']
|
||||
assert 'foo' not in c
|
||||
@ -67,14 +71,14 @@ class TestCaseCache:
|
||||
|
||||
def test_persistance(self, tmpdir):
|
||||
DBNAME = tmpdir.join('hstest.db')
|
||||
c = Cache(str(DBNAME))
|
||||
c = self.get_cache(str(DBNAME))
|
||||
c['foo'] = [(1, 2, 3)]
|
||||
del c
|
||||
c = Cache(str(DBNAME))
|
||||
c = self.get_cache(str(DBNAME))
|
||||
eq_([(1, 2, 3)], c['foo'])
|
||||
|
||||
def test_filter(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
c['bar'] = ''
|
||||
c['baz'] = ''
|
||||
@ -85,7 +89,7 @@ class TestCaseCache:
|
||||
assert 'bar' not in c
|
||||
|
||||
def test_clear(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
c['foo'] = ''
|
||||
c['bar'] = ''
|
||||
c['baz'] = ''
|
||||
@ -95,6 +99,22 @@ class TestCaseCache:
|
||||
assert 'baz' not in c
|
||||
assert 'bar' not in c
|
||||
|
||||
def test_by_id(self):
|
||||
# it's possible to use the cache by referring to the files by their row_id
|
||||
c = self.get_cache()
|
||||
b = [(0, 0, 0), (1, 2, 3)]
|
||||
c['foo'] = b
|
||||
foo_id = c.get_id('foo')
|
||||
eq_(c[foo_id], b)
|
||||
|
||||
|
||||
class TestCaseSqliteCache(BaseTestCaseCache):
|
||||
def get_cache(self, dbname=None):
|
||||
if dbname:
|
||||
return SqliteCache(dbname)
|
||||
else:
|
||||
return SqliteCache()
|
||||
|
||||
def test_corrupted_db(self, tmpdir, monkeypatch):
|
||||
# If we don't do this monkeypatching, we get a weird exception about trying to flush a
|
||||
# closed file. I've tried setting logging level and stuff, but nothing worked. So, there we
|
||||
@ -104,37 +124,32 @@ class TestCaseCache:
|
||||
fp = open(dbname, 'w')
|
||||
fp.write('invalid sqlite content')
|
||||
fp.close()
|
||||
c = Cache(dbname) # should not raise a DatabaseError
|
||||
c = self.get_cache(dbname) # should not raise a DatabaseError
|
||||
c['foo'] = [(1, 2, 3)]
|
||||
del c
|
||||
c = Cache(dbname)
|
||||
c = self.get_cache(dbname)
|
||||
eq_(c['foo'], [(1, 2, 3)])
|
||||
|
||||
def test_by_id(self):
|
||||
# it's possible to use the cache by referring to the files by their row_id
|
||||
c = Cache()
|
||||
b = [(0, 0, 0), (1, 2, 3)]
|
||||
c['foo'] = b
|
||||
foo_id = c.get_id('foo')
|
||||
eq_(c[foo_id], b)
|
||||
|
||||
|
||||
class TestCaseCacheSQLEscape:
|
||||
def get_cache(self):
|
||||
return SqliteCache()
|
||||
|
||||
def test_contains(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
assert "foo'bar" not in c
|
||||
|
||||
def test_getitem(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
with raises(KeyError):
|
||||
c["foo'bar"]
|
||||
|
||||
def test_setitem(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
c["foo'bar"] = []
|
||||
|
||||
def test_delitem(self):
|
||||
c = Cache()
|
||||
c = self.get_cache()
|
||||
c["foo'bar"] = []
|
||||
try:
|
||||
del c["foo'bar"]
|
||||
|
Loading…
Reference in New Issue
Block a user