Merge pull request #1088 from arsenetar/as/remove-shelve

feat: Remove shelve picture cache
This commit is contained in:
Andrew Senetar 2023-01-09 22:48:37 -06:00 committed by GitHub
commit a5e31f15f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 4 additions and 174 deletions

View File

@ -126,8 +126,6 @@ class DupeGuru(Broadcaster):
NAME = PROMPT_NAME = "dupeGuru"
PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache
def __init__(self, view, portable=False):
if view.get_default(DEBUG_MODE_PREFERENCE):
logging.getLogger().setLevel(logging.DEBUG)
@ -153,7 +151,6 @@ class DupeGuru(Broadcaster):
"clean_empty_dirs": False,
"ignore_hardlink_matches": False,
"copymove_dest_type": DestType.RELATIVE,
"picture_cache_type": self.PICTURE_CACHE_TYPE,
"include_exists_check": True,
"rehash_ignore_mtime": False,
}
@ -185,8 +182,7 @@ class DupeGuru(Broadcaster):
self.view.create_results_window()
def _get_picture_cache_path(self):
cache_type = self.options["picture_cache_type"]
cache_name = "cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
cache_name = "cached_pictures.db"
return op.join(self.appdata, cache_name)
def _get_dupe_sort_key(self, dupe, get_group, key, delta):

View File

@ -1,141 +0,0 @@
# Copyright 2016 Virgil Dupras
#
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
import os
import os.path as op
import shelve
import tempfile
from collections import namedtuple
from core.pe.cache import string_to_colors, colors_to_string
def wrap_path(path):
return f"path:{path}"
def unwrap_path(key):
return key[5:]
def wrap_id(path):
return f"id:{path}"
def unwrap_id(key):
return int(key[3:])
CacheRow = namedtuple("CacheRow", "id path blocks mtime")
class ShelveCache:
"""A class to cache picture blocks in a shelve backend."""
def __init__(self, db=None, readonly=False):
self.istmp = db is None
if self.istmp:
self.dtmp = tempfile.mkdtemp()
self.ftmp = db = op.join(self.dtmp, "tmpdb")
flag = "r" if readonly else "c"
self.shelve = shelve.open(db, flag)
self.maxid = self._compute_maxid()
def __contains__(self, key):
return wrap_path(key) in self.shelve
def __delitem__(self, key):
row = self.shelve[wrap_path(key)]
del self.shelve[wrap_path(key)]
del self.shelve[wrap_id(row.id)]
def __getitem__(self, key):
if isinstance(key, int):
skey = self.shelve[wrap_id(key)]
else:
skey = wrap_path(key)
return string_to_colors(self.shelve[skey].blocks)
def __iter__(self):
return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
def __len__(self):
return sum(1 for k in self.shelve if k.startswith("path:"))
def __setitem__(self, path_str, blocks):
blocks = colors_to_string(blocks)
if op.exists(path_str):
mtime = int(os.stat(path_str).st_mtime)
else:
mtime = 0
if path_str in self:
rowid = self.shelve[wrap_path(path_str)].id
else:
rowid = self._get_new_id()
row = CacheRow(rowid, path_str, blocks, mtime)
self.shelve[wrap_path(path_str)] = row
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
def _compute_maxid(self):
return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1)
def _get_new_id(self):
self.maxid += 1
return self.maxid
def clear(self):
self.shelve.clear()
def close(self):
if self.shelve is not None:
self.shelve.close()
if self.istmp:
os.remove(self.ftmp)
os.rmdir(self.dtmp)
self.shelve = None
def filter(self, func):
to_delete = [key for key in self if not func(key)]
for key in to_delete:
del self[key]
def get_id(self, path):
if path in self:
return self.shelve[wrap_path(path)].id
else:
raise ValueError(path)
def get_multiple(self, rowids):
for rowid in rowids:
try:
skey = self.shelve[wrap_id(rowid)]
except KeyError:
continue
yield (rowid, string_to_colors(self.shelve[skey].blocks))
def purge_outdated(self):
"""Go through the cache and purge outdated records.
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
the db.
"""
todelete = []
for path in self:
row = self.shelve[wrap_path(path)]
if row.mtime and op.exists(path):
picture_mtime = os.stat(path).st_mtime
if int(picture_mtime) <= row.mtime:
# not outdated
continue
todelete.append(path)
for path in todelete:
try:
del self[path]
except KeyError:
# I have no idea why a KeyError sometimes happen, but it does, as we can see in
# #402 and #439. I don't think it hurts to silently ignore the error, so that's
# what we do
pass

View File

@ -16,6 +16,7 @@ from hscommon.jobprogress import job
from core.engine import Match
from core.pe.block import avgdiff, DifferentBlockCountError, NoBlocksError
from core.pe.cache_sqlite import SqliteCache
# OPTIMIZATION NOTES:
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
@ -50,14 +51,7 @@ except Exception:
def get_cache(cache_path, readonly=False):
if cache_path.endswith("shelve"):
from core.pe.cache_shelve import ShelveCache
return ShelveCache(cache_path, readonly=readonly)
else:
from core.pe.cache_sqlite import SqliteCache
return SqliteCache(cache_path, readonly=readonly)
return SqliteCache(cache_path, readonly=readonly)
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):

View File

@ -12,7 +12,6 @@ from hscommon.testutil import eq_
try:
from core.pe.cache import colors_to_string, string_to_colors
from core.pe.cache_sqlite import SqliteCache
from core.pe.cache_shelve import ShelveCache
except ImportError:
skip("Can't import the cache module, probably hasn't been compiled.")
@ -133,11 +132,6 @@ class TestCaseSqliteCache(BaseTestCaseCache):
eq_(c["foo"], [(1, 2, 3)])
class TestCaseShelveCache(BaseTestCaseCache):
def get_cache(self, dbname=None):
return ShelveCache(dbname)
class TestCaseCacheSQLEscape:
def get_cache(self):
return SqliteCache()

View File

@ -192,7 +192,6 @@ class DupeGuru(QObject):
scanned_tags.add("year")
self.model.options["scanned_tags"] = scanned_tags
self.model.options["match_scaled"] = self.prefs.match_scaled
self.model.options["picture_cache_type"] = self.prefs.picture_cache_type
self.model.options["include_exists_check"] = self.prefs.include_exists_check
self.model.options["rehash_ignore_mtime"] = self.prefs.rehash_ignore_mtime

View File

@ -4,11 +4,9 @@
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
from PyQt5.QtWidgets import QFormLayout
from PyQt5.QtCore import Qt
from hscommon.trans import trget
from hscommon.plat import ISLINUX
from qt.radio_box import RadioBox
from core.scanner import ScanType
from core.app import AppMode
@ -35,11 +33,6 @@ class PreferencesDialog(PreferencesDialogBase):
)
self.widgetsVLayout.addWidget(self.ignoreHardlinkMatches)
self.cacheTypeRadio = RadioBox(self, items=["Sqlite", "Shelve"], spread=False)
cache_form = QFormLayout()
cache_form.setLabelAlignment(Qt.AlignLeft)
cache_form.addRow(tr("Picture cache mode:"), self.cacheTypeRadio)
self.widgetsVLayout.addLayout(cache_form)
self._setupBottomPart()
def _setupDisplayPage(self):
@ -64,7 +57,6 @@ show scrollbars to span the view around"
def _load(self, prefs, setchecked, section):
setchecked(self.matchScaledBox, prefs.match_scaled)
self.cacheTypeRadio.selected_index = 1 if prefs.picture_cache_type == "shelve" else 0
# Update UI state based on selected scan type
scan_type = prefs.get_scan_type(AppMode.PICTURE)
@ -75,6 +67,5 @@ show scrollbars to span the view around"
def _save(self, prefs, ischecked):
prefs.match_scaled = ischecked(self.matchScaledBox)
prefs.picture_cache_type = "shelve" if self.cacheTypeRadio.selected_index == 1 else "sqlite"
prefs.details_dialog_override_theme_icons = ischecked(self.details_dialog_override_theme_icons)
prefs.details_dialog_viewers_show_scrollbars = ischecked(self.details_dialog_viewers_show_scrollbars)

View File

@ -225,7 +225,6 @@ class Preferences(PreferencesBase):
self.scan_tag_genre = get("ScanTagGenre", self.scan_tag_genre)
self.scan_tag_year = get("ScanTagYear", self.scan_tag_year)
self.match_scaled = get("MatchScaled", self.match_scaled)
self.picture_cache_type = get("PictureCacheType", self.picture_cache_type)
def reset(self):
self.filter_hardness = 95
@ -278,7 +277,6 @@ class Preferences(PreferencesBase):
self.scan_tag_genre = False
self.scan_tag_year = False
self.match_scaled = False
self.picture_cache_type = "sqlite"
def _save_values(self, settings):
set_ = self.set_value
@ -332,7 +330,6 @@ class Preferences(PreferencesBase):
set_("ScanTagGenre", self.scan_tag_genre)
set_("ScanTagYear", self.scan_tag_year)
set_("MatchScaled", self.match_scaled)
set_("PictureCacheType", self.picture_cache_type)
# scan_type is special because we save it immediately when we set it.
def get_scan_type(self, app_mode):