mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
Add shelve-based picture cache implementation
Hopefully, this will fix #394 for real this time, that is, without the need for a messy python executable ship in the app.
This commit is contained in:
parent
f7adb5f11e
commit
c58a4817ca
@ -10,6 +10,8 @@ from .directories import Directories, Bundle
|
|||||||
from .photo import Photo
|
from .photo import Photo
|
||||||
|
|
||||||
class DupeGuru(DupeGuruBase):
|
class DupeGuru(DupeGuruBase):
|
||||||
|
PICTURE_CACHE_TYPE = 'shelve'
|
||||||
|
|
||||||
def __init__(self, view):
|
def __init__(self, view):
|
||||||
DupeGuruBase.__init__(self, view)
|
DupeGuruBase.__init__(self, view)
|
||||||
self.directories = Directories()
|
self.directories = Directories()
|
||||||
|
12
core/app.py
12
core/app.py
@ -116,6 +116,8 @@ class DupeGuru(Broadcaster):
|
|||||||
|
|
||||||
NAME = PROMPT_NAME = "dupeGuru"
|
NAME = PROMPT_NAME = "dupeGuru"
|
||||||
|
|
||||||
|
PICTURE_CACHE_TYPE = 'sqlite' # set to 'shelve' for a ShelveCache
|
||||||
|
|
||||||
def __init__(self, view):
|
def __init__(self, view):
|
||||||
if view.get_default(DEBUG_MODE_PREFERENCE):
|
if view.get_default(DEBUG_MODE_PREFERENCE):
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
@ -133,12 +135,13 @@ class DupeGuru(Broadcaster):
|
|||||||
# In addition to "app-level" options, this dictionary also holds options that will be
|
# In addition to "app-level" options, this dictionary also holds options that will be
|
||||||
# sent to the scanner. They don't have default values because those defaults values are
|
# sent to the scanner. They don't have default values because those defaults values are
|
||||||
# defined in the scanner class.
|
# defined in the scanner class.
|
||||||
|
picture_cache_name = 'cached_pictures.shelve' if self.PICTURE_CACHE_TYPE == 'shelve' else 'cached_pictures.db'
|
||||||
self.options = {
|
self.options = {
|
||||||
'escape_filter_regexp': True,
|
'escape_filter_regexp': True,
|
||||||
'clean_empty_dirs': False,
|
'clean_empty_dirs': False,
|
||||||
'ignore_hardlink_matches': False,
|
'ignore_hardlink_matches': False,
|
||||||
'copymove_dest_type': DestType.Relative,
|
'copymove_dest_type': DestType.Relative,
|
||||||
'cache_path': op.join(self.appdata, 'cached_pictures.db'),
|
'cache_path': op.join(self.appdata, picture_cache_name),
|
||||||
}
|
}
|
||||||
self.selected_dupes = []
|
self.selected_dupes = []
|
||||||
self.details_panel = DetailsPanel(self)
|
self.details_panel = DetailsPanel(self)
|
||||||
@ -405,9 +408,10 @@ class DupeGuru(Broadcaster):
|
|||||||
path = path.parent()
|
path = path.parent()
|
||||||
|
|
||||||
def clear_picture_cache(self):
|
def clear_picture_cache(self):
|
||||||
cache = pe.cache_sqlite.SqliteCache(self.options['cache_path'])
|
try:
|
||||||
cache.clear()
|
os.remove(self.options['cache_path'])
|
||||||
cache.close()
|
except FileNotFoundError:
|
||||||
|
pass # we don't care
|
||||||
|
|
||||||
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
||||||
source_path = dupe.path
|
source_path = dupe.path
|
||||||
|
@ -1 +1 @@
|
|||||||
from . import block, cache, cache_sqlite, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
from . import block, cache, exif, iphoto_plist, matchblock, matchexif, photo, prioritize, result_table, scanner # noqa
|
||||||
|
131
core/pe/cache_shelve.py
Normal file
131
core/pe/cache_shelve.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
# Copyright 2016 Virgil Dupras
|
||||||
|
#
|
||||||
|
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||||
|
# which should be included with this package. The terms are also available at
|
||||||
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
|
import os
|
||||||
|
import os.path as op
|
||||||
|
import shelve
|
||||||
|
import tempfile
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from .cache import string_to_colors, colors_to_string
|
||||||
|
|
||||||
|
def wrap_path(path):
|
||||||
|
return 'path:{}'.format(path)
|
||||||
|
|
||||||
|
def unwrap_path(key):
|
||||||
|
return key[5:]
|
||||||
|
|
||||||
|
def wrap_id(path):
|
||||||
|
return 'id:{}'.format(path)
|
||||||
|
|
||||||
|
def unwrap_id(key):
|
||||||
|
return int(key[3:])
|
||||||
|
|
||||||
|
CacheRow = namedtuple('CacheRow', 'id path blocks mtime')
|
||||||
|
|
||||||
|
class ShelveCache:
|
||||||
|
"""A class to cache picture blocks in a shelve backend.
|
||||||
|
"""
|
||||||
|
def __init__(self, db=None, readonly=False):
|
||||||
|
self.istmp = db is None
|
||||||
|
if self.istmp:
|
||||||
|
self.dtmp = tempfile.mkdtemp()
|
||||||
|
self.ftmp = db = op.join(self.dtmp, 'tmpdb')
|
||||||
|
flag = 'r' if readonly else 'c'
|
||||||
|
self.shelve = shelve.open(db, flag)
|
||||||
|
self.maxid = self._compute_maxid()
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return wrap_path(key) in self.shelve
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
row = self.shelve[wrap_path(key)]
|
||||||
|
del self.shelve[wrap_path(key)]
|
||||||
|
del self.shelve[wrap_id(row.id)]
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
if isinstance(key, int):
|
||||||
|
skey = self.shelve[wrap_id(key)]
|
||||||
|
else:
|
||||||
|
skey = wrap_path(key)
|
||||||
|
return string_to_colors(self.shelve[skey].blocks)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (unwrap_path(k) for k in self.shelve if k.startswith('path:'))
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return sum(1 for k in self.shelve if k.startswith('path:'))
|
||||||
|
|
||||||
|
def __setitem__(self, path_str, blocks):
|
||||||
|
blocks = colors_to_string(blocks)
|
||||||
|
if op.exists(path_str):
|
||||||
|
mtime = int(os.stat(path_str).st_mtime)
|
||||||
|
else:
|
||||||
|
mtime = 0
|
||||||
|
if path_str in self:
|
||||||
|
rowid = self.shelve[wrap_path(path_str)].id
|
||||||
|
else:
|
||||||
|
rowid = self._get_new_id()
|
||||||
|
row = CacheRow(rowid, path_str, blocks, mtime)
|
||||||
|
self.shelve[wrap_path(path_str)] = row
|
||||||
|
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
|
||||||
|
|
||||||
|
def _compute_maxid(self):
|
||||||
|
return max((unwrap_id(k) for k in self.shelve if k.startswith('id:')), default=1)
|
||||||
|
|
||||||
|
def _get_new_id(self):
|
||||||
|
self.maxid += 1
|
||||||
|
return self.maxid
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
self.shelve.clear()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if self.shelve is not None:
|
||||||
|
self.shelve.close()
|
||||||
|
if self.istmp:
|
||||||
|
os.remove(self.ftmp)
|
||||||
|
os.rmdir(self.dtmp)
|
||||||
|
self.shelve = None
|
||||||
|
|
||||||
|
def filter(self, func):
|
||||||
|
to_delete = [key for key in self if not func(key)]
|
||||||
|
for key in to_delete:
|
||||||
|
del self[key]
|
||||||
|
|
||||||
|
def get_id(self, path):
|
||||||
|
if path in self:
|
||||||
|
return self.shelve[wrap_path(path)].id
|
||||||
|
else:
|
||||||
|
raise ValueError(path)
|
||||||
|
|
||||||
|
def get_multiple(self, rowids):
|
||||||
|
for rowid in rowids:
|
||||||
|
try:
|
||||||
|
skey = self.shelve[wrap_id(rowid)]
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
yield (rowid, string_to_colors(self.shelve[skey].blocks))
|
||||||
|
|
||||||
|
def purge_outdated(self):
|
||||||
|
"""Go through the cache and purge outdated records.
|
||||||
|
|
||||||
|
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
|
||||||
|
the db.
|
||||||
|
"""
|
||||||
|
todelete = []
|
||||||
|
for path in self:
|
||||||
|
row = self.shelve[wrap_path(path)]
|
||||||
|
if row.mtime and op.exists(path):
|
||||||
|
picture_mtime = os.stat(path).st_mtime
|
||||||
|
if int(picture_mtime) <= row.mtime:
|
||||||
|
# not outdated
|
||||||
|
continue
|
||||||
|
todelete.append(path)
|
||||||
|
for path in todelete:
|
||||||
|
del self[path]
|
||||||
|
|
||||||
|
|
@ -12,9 +12,10 @@ import sqlite3 as sqlite
|
|||||||
from .cache import string_to_colors, colors_to_string
|
from .cache import string_to_colors, colors_to_string
|
||||||
|
|
||||||
class SqliteCache:
|
class SqliteCache:
|
||||||
"""A class to cache picture blocks.
|
"""A class to cache picture blocks in a sqlite backend.
|
||||||
"""
|
"""
|
||||||
def __init__(self, db=':memory:'):
|
def __init__(self, db=':memory:', readonly=False):
|
||||||
|
# readonly is not used in the sqlite version of the cache
|
||||||
self.dbname = db
|
self.dbname = db
|
||||||
self.con = None
|
self.con = None
|
||||||
self._create_con()
|
self._create_con()
|
||||||
|
@ -16,7 +16,6 @@ from hscommon.jobprogress import job
|
|||||||
|
|
||||||
from core.engine import Match
|
from core.engine import Match
|
||||||
from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
||||||
from .cache_sqlite import SqliteCache
|
|
||||||
|
|
||||||
# OPTIMIZATION NOTES:
|
# OPTIMIZATION NOTES:
|
||||||
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
|
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
|
||||||
@ -49,12 +48,20 @@ except Exception:
|
|||||||
logging.warning("Had problems to determine cpu count on launch.")
|
logging.warning("Had problems to determine cpu count on launch.")
|
||||||
RESULTS_QUEUE_LIMIT = 8
|
RESULTS_QUEUE_LIMIT = 8
|
||||||
|
|
||||||
|
def get_cache(cache_path, readonly=False):
|
||||||
|
if cache_path.endswith('shelve'):
|
||||||
|
from .cache_shelve import ShelveCache
|
||||||
|
return ShelveCache(cache_path, readonly=readonly)
|
||||||
|
else:
|
||||||
|
from .cache_sqlite import SqliteCache
|
||||||
|
return SqliteCache(cache_path, readonly=readonly)
|
||||||
|
|
||||||
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
||||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||||
# there is enough memory left to carry on the operation because it is assumed that the
|
# there is enough memory left to carry on the operation because it is assumed that the
|
||||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||||
# time that MemoryError is raised.
|
# time that MemoryError is raised.
|
||||||
cache = SqliteCache(cache_path)
|
cache = get_cache(cache_path)
|
||||||
cache.purge_outdated()
|
cache.purge_outdated()
|
||||||
prepared = [] # only pictures for which there was no error getting blocks
|
prepared = [] # only pictures for which there was no error getting blocks
|
||||||
try:
|
try:
|
||||||
@ -109,7 +116,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
|||||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||||
# can be None. In this case, ref_ids has to be compared with itself
|
# can be None. In this case, ref_ids has to be compared with itself
|
||||||
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
|
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
|
||||||
cache = SqliteCache(dbname)
|
cache = get_cache(dbname, readonly=True)
|
||||||
limit = 100 - threshold
|
limit = 100 - threshold
|
||||||
ref_pairs = list(cache.get_multiple(ref_ids))
|
ref_pairs = list(cache.get_multiple(ref_ids))
|
||||||
if other_ids is not None:
|
if other_ids is not None:
|
||||||
@ -159,7 +166,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
|||||||
j = j.start_subjob([3, 7])
|
j = j.start_subjob([3, 7])
|
||||||
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
||||||
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
||||||
cache = SqliteCache(cache_path)
|
cache = get_cache(cache_path)
|
||||||
id2picture = {}
|
id2picture = {}
|
||||||
for picture in pictures:
|
for picture in pictures:
|
||||||
try:
|
try:
|
||||||
|
@ -12,6 +12,7 @@ from hscommon.testutil import eq_
|
|||||||
try:
|
try:
|
||||||
from ..pe.cache import colors_to_string, string_to_colors
|
from ..pe.cache import colors_to_string, string_to_colors
|
||||||
from ..pe.cache_sqlite import SqliteCache
|
from ..pe.cache_sqlite import SqliteCache
|
||||||
|
from ..pe.cache_shelve import ShelveCache
|
||||||
except ImportError:
|
except ImportError:
|
||||||
skip("Can't import the cache module, probably hasn't been compiled.")
|
skip("Can't import the cache module, probably hasn't been compiled.")
|
||||||
|
|
||||||
@ -131,6 +132,11 @@ class TestCaseSqliteCache(BaseTestCaseCache):
|
|||||||
eq_(c['foo'], [(1, 2, 3)])
|
eq_(c['foo'], [(1, 2, 3)])
|
||||||
|
|
||||||
|
|
||||||
|
class TestCaseShelveCache(BaseTestCaseCache):
|
||||||
|
def get_cache(self, dbname=None):
|
||||||
|
return ShelveCache(dbname)
|
||||||
|
|
||||||
|
|
||||||
class TestCaseCacheSQLEscape:
|
class TestCaseCacheSQLEscape:
|
||||||
def get_cache(self):
|
def get_cache(self):
|
||||||
return SqliteCache()
|
return SqliteCache()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user