Merge pull request #1088 from arsenetar/as/remove-shelve

feat: Remove shelve picture cache
Andrew Senetar 11 months ago committed by GitHub
commit a5e31f15f0
No known key found for this signature in database

@ -126,8 +126,6 @@ class DupeGuru(Broadcaster):
NAME = PROMPT_NAME = "dupeGuru"
PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache
def __init__(self, view, portable=False):
if view.get_default(DEBUG_MODE_PREFERENCE):
@ -153,7 +151,6 @@ class DupeGuru(Broadcaster):
"clean_empty_dirs": False,
"ignore_hardlink_matches": False,
"copymove_dest_type": DestType.RELATIVE,
"picture_cache_type": self.PICTURE_CACHE_TYPE,
"include_exists_check": True,
"rehash_ignore_mtime": False,
@ -185,8 +182,7 @@ class DupeGuru(Broadcaster):
def _get_picture_cache_path(self):
cache_type = self.options["picture_cache_type"]
cache_name = "cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
cache_name = "cached_pictures.db"
return op.join(self.appdata, cache_name)
def _get_dupe_sort_key(self, dupe, get_group, key, delta):

@ -1,141 +0,0 @@
# Copyright 2016 Virgil Dupras
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
import os
import os.path as op
import shelve
import tempfile
from collections import namedtuple
from import string_to_colors, colors_to_string
def wrap_path(path):
return f"path:{path}"
def unwrap_path(key):
return key[5:]
def wrap_id(path):
return f"id:{path}"
def unwrap_id(key):
return int(key[3:])
CacheRow = namedtuple("CacheRow", "id path blocks mtime")
class ShelveCache:
"""A class to cache picture blocks in a shelve backend."""
def __init__(self, db=None, readonly=False):
self.istmp = db is None
if self.istmp:
self.dtmp = tempfile.mkdtemp()
self.ftmp = db = op.join(self.dtmp, "tmpdb")
flag = "r" if readonly else "c"
self.shelve =, flag)
self.maxid = self._compute_maxid()
def __contains__(self, key):
return wrap_path(key) in self.shelve
def __delitem__(self, key):
row = self.shelve[wrap_path(key)]
del self.shelve[wrap_path(key)]
del self.shelve[wrap_id(]
def __getitem__(self, key):
if isinstance(key, int):
skey = self.shelve[wrap_id(key)]
skey = wrap_path(key)
return string_to_colors(self.shelve[skey].blocks)
def __iter__(self):
return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
def __len__(self):
return sum(1 for k in self.shelve if k.startswith("path:"))
def __setitem__(self, path_str, blocks):
blocks = colors_to_string(blocks)
if op.exists(path_str):
mtime = int(os.stat(path_str).st_mtime)
mtime = 0
if path_str in self:
rowid = self.shelve[wrap_path(path_str)].id
rowid = self._get_new_id()
row = CacheRow(rowid, path_str, blocks, mtime)
self.shelve[wrap_path(path_str)] = row
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
def _compute_maxid(self):
return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1)
def _get_new_id(self):
self.maxid += 1
return self.maxid
def clear(self):
def close(self):
if self.shelve is not None:
if self.istmp:
self.shelve = None
def filter(self, func):
to_delete = [key for key in self if not func(key)]
for key in to_delete:
del self[key]
def get_id(self, path):
if path in self:
return self.shelve[wrap_path(path)].id
raise ValueError(path)
def get_multiple(self, rowids):
for rowid in rowids:
skey = self.shelve[wrap_id(rowid)]
except KeyError:
yield (rowid, string_to_colors(self.shelve[skey].blocks))
def purge_outdated(self):
"""Go through the cache and purge outdated records.
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
the db.
todelete = []
for path in self:
row = self.shelve[wrap_path(path)]
if row.mtime and op.exists(path):
picture_mtime = os.stat(path).st_mtime
if int(picture_mtime) <= row.mtime:
# not outdated
for path in todelete:
del self[path]
except KeyError:
# I have no idea why a KeyError sometimes happen, but it does, as we can see in
# #402 and #439. I don't think it hurts to silently ignore the error, so that's
# what we do

@ -16,6 +16,7 @@ from hscommon.jobprogress import job
from core.engine import Match
from import avgdiff, DifferentBlockCountError, NoBlocksError
from import SqliteCache
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
@ -50,14 +51,7 @@ except Exception:
def get_cache(cache_path, readonly=False):
if cache_path.endswith("shelve"):
from import ShelveCache
return ShelveCache(cache_path, readonly=readonly)
from import SqliteCache
return SqliteCache(cache_path, readonly=readonly)
return SqliteCache(cache_path, readonly=readonly)
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):

@ -12,7 +12,6 @@ from hscommon.testutil import eq_
from import colors_to_string, string_to_colors
from import SqliteCache
from import ShelveCache
except ImportError:
skip("Can't import the cache module, probably hasn't been compiled.")
@ -133,11 +132,6 @@ class TestCaseSqliteCache(BaseTestCaseCache):
eq_(c["foo"], [(1, 2, 3)])
class TestCaseShelveCache(BaseTestCaseCache):
def get_cache(self, dbname=None):
return ShelveCache(dbname)
class TestCaseCacheSQLEscape:
def get_cache(self):
return SqliteCache()

@ -192,7 +192,6 @@ class DupeGuru(QObject):
self.model.options["scanned_tags"] = scanned_tags
self.model.options["match_scaled"] = self.prefs.match_scaled
self.model.options["picture_cache_type"] = self.prefs.picture_cache_type
self.model.options["include_exists_check"] = self.prefs.include_exists_check
self.model.options["rehash_ignore_mtime"] = self.prefs.rehash_ignore_mtime

@ -4,11 +4,9 @@
# which should be included with this package. The terms are also available at
from PyQt5.QtWidgets import QFormLayout
from PyQt5.QtCore import Qt
from hscommon.trans import trget
from hscommon.plat import ISLINUX
from qt.radio_box import RadioBox
from core.scanner import ScanType
from import AppMode
@ -35,11 +33,6 @@ class PreferencesDialog(PreferencesDialogBase):
self.cacheTypeRadio = RadioBox(self, items=["Sqlite", "Shelve"], spread=False)
cache_form = QFormLayout()
cache_form.addRow(tr("Picture cache mode:"), self.cacheTypeRadio)
def _setupDisplayPage(self):
@ -64,7 +57,6 @@ show scrollbars to span the view around"
def _load(self, prefs, setchecked, section):
setchecked(self.matchScaledBox, prefs.match_scaled)
self.cacheTypeRadio.selected_index = 1 if prefs.picture_cache_type == "shelve" else 0
# Update UI state based on selected scan type
scan_type = prefs.get_scan_type(AppMode.PICTURE)
@ -75,6 +67,5 @@ show scrollbars to span the view around"
def _save(self, prefs, ischecked):
prefs.match_scaled = ischecked(self.matchScaledBox)
prefs.picture_cache_type = "shelve" if self.cacheTypeRadio.selected_index == 1 else "sqlite"
prefs.details_dialog_override_theme_icons = ischecked(self.details_dialog_override_theme_icons)
prefs.details_dialog_viewers_show_scrollbars = ischecked(self.details_dialog_viewers_show_scrollbars)

@ -225,7 +225,6 @@ class Preferences(PreferencesBase):
self.scan_tag_genre = get("ScanTagGenre", self.scan_tag_genre)
self.scan_tag_year = get("ScanTagYear", self.scan_tag_year)
self.match_scaled = get("MatchScaled", self.match_scaled)
self.picture_cache_type = get("PictureCacheType", self.picture_cache_type)
def reset(self):
self.filter_hardness = 95
@ -278,7 +277,6 @@ class Preferences(PreferencesBase):
self.scan_tag_genre = False
self.scan_tag_year = False
self.match_scaled = False
self.picture_cache_type = "sqlite"
def _save_values(self, settings):
set_ = self.set_value
@ -332,7 +330,6 @@ class Preferences(PreferencesBase):
set_("ScanTagGenre", self.scan_tag_genre)
set_("ScanTagYear", self.scan_tag_year)
set_("MatchScaled", self.match_scaled)
set_("PictureCacheType", self.picture_cache_type)
# scan_type is special because we save it immediately when we set it.
def get_scan_type(self, app_mode):