From 83f5e8042794ed48c7170c6e0362c3c4baaa3131 Mon Sep 17 00:00:00 2001 From: Andrew Senetar Date: Fri, 6 Jan 2023 00:35:23 -0600 Subject: [PATCH] feat: Remove shelve picture cache - Remove shelve picture cache as it has had a fair number of historical issues. Original issue for which it was added should be long resolved. Additionally this allows additional consolidation of the various cache code and potentially dbs in the future. - Remove all related preferences and related code for changing cache backend between sqlite and shelve. --- core/app.py | 6 +- core/pe/cache_shelve.py | 141 ------------------------------------ core/pe/matchblock.py | 10 +-- core/tests/cache_test.py | 6 -- qt/app.py | 1 - qt/pe/preferences_dialog.py | 11 +-- qt/preferences.py | 3 - 7 files changed, 4 insertions(+), 174 deletions(-) delete mode 100644 core/pe/cache_shelve.py diff --git a/core/app.py b/core/app.py index 9a907aed..7cde6317 100644 --- a/core/app.py +++ b/core/app.py @@ -126,8 +126,6 @@ class DupeGuru(Broadcaster): NAME = PROMPT_NAME = "dupeGuru" - PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache - def __init__(self, view, portable=False): if view.get_default(DEBUG_MODE_PREFERENCE): logging.getLogger().setLevel(logging.DEBUG) @@ -153,7 +151,6 @@ class DupeGuru(Broadcaster): "clean_empty_dirs": False, "ignore_hardlink_matches": False, "copymove_dest_type": DestType.RELATIVE, - "picture_cache_type": self.PICTURE_CACHE_TYPE, "include_exists_check": True, "rehash_ignore_mtime": False, } @@ -185,8 +182,7 @@ class DupeGuru(Broadcaster): self.view.create_results_window() def _get_picture_cache_path(self): - cache_type = self.options["picture_cache_type"] - cache_name = "cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db" + cache_name = "cached_pictures.db" return op.join(self.appdata, cache_name) def _get_dupe_sort_key(self, dupe, get_group, key, delta): diff --git a/core/pe/cache_shelve.py b/core/pe/cache_shelve.py deleted file mode 100644 index 57f42775..00000000 --- a/core/pe/cache_shelve.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2016 Virgil Dupras -# -# This software is licensed under the "GPLv3" License as described in the "LICENSE" file, -# which should be included with this package. The terms are also available at -# http://www.gnu.org/licenses/gpl-3.0.html - -import os -import os.path as op -import shelve -import tempfile -from collections import namedtuple - -from core.pe.cache import string_to_colors, colors_to_string - - -def wrap_path(path): - return f"path:{path}" - - -def unwrap_path(key): - return key[5:] - - -def wrap_id(path): - return f"id:{path}" - - -def unwrap_id(key): - return int(key[3:]) - - -CacheRow = namedtuple("CacheRow", "id path blocks mtime") - - -class ShelveCache: - """A class to cache picture blocks in a shelve backend.""" - - def __init__(self, db=None, readonly=False): - self.istmp = db is None - if self.istmp: - self.dtmp = tempfile.mkdtemp() - self.ftmp = db = op.join(self.dtmp, "tmpdb") - flag = "r" if readonly else "c" - self.shelve = shelve.open(db, flag) - self.maxid = self._compute_maxid() - - def __contains__(self, key): - return wrap_path(key) in self.shelve - - def __delitem__(self, key): - row = self.shelve[wrap_path(key)] - del self.shelve[wrap_path(key)] - del self.shelve[wrap_id(row.id)] - - def __getitem__(self, key): - if isinstance(key, int): - skey = self.shelve[wrap_id(key)] - else: - skey = wrap_path(key) - return string_to_colors(self.shelve[skey].blocks) - - def __iter__(self): - return (unwrap_path(k) for k in self.shelve if k.startswith("path:")) - - def __len__(self): - return sum(1 for k in self.shelve if k.startswith("path:")) - - def __setitem__(self, path_str, blocks): - blocks = colors_to_string(blocks) - if op.exists(path_str): - mtime = int(os.stat(path_str).st_mtime) - else: - mtime = 0 - if path_str in self: - rowid = self.shelve[wrap_path(path_str)].id - else: - rowid = self._get_new_id() - row = CacheRow(rowid, path_str, blocks, mtime) - self.shelve[wrap_path(path_str)] = row - self.shelve[wrap_id(rowid)] = wrap_path(path_str) - - def _compute_maxid(self): - return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1) - - def _get_new_id(self): - self.maxid += 1 - return self.maxid - - def clear(self): - self.shelve.clear() - - def close(self): - if self.shelve is not None: - self.shelve.close() - if self.istmp: - os.remove(self.ftmp) - os.rmdir(self.dtmp) - self.shelve = None - - def filter(self, func): - to_delete = [key for key in self if not func(key)] - for key in to_delete: - del self[key] - - def get_id(self, path): - if path in self: - return self.shelve[wrap_path(path)].id - else: - raise ValueError(path) - - def get_multiple(self, rowids): - for rowid in rowids: - try: - skey = self.shelve[wrap_id(rowid)] - except KeyError: - continue - yield (rowid, string_to_colors(self.shelve[skey].blocks)) - - def purge_outdated(self): - """Go through the cache and purge outdated records. - - A record is outdated if the picture doesn't exist or if its mtime is greater than the one in - the db. - """ - todelete = [] - for path in self: - row = self.shelve[wrap_path(path)] - if row.mtime and op.exists(path): - picture_mtime = os.stat(path).st_mtime - if int(picture_mtime) <= row.mtime: - # not outdated - continue - todelete.append(path) - for path in todelete: - try: - del self[path] - except KeyError: - # I have no idea why a KeyError sometimes happen, but it does, as we can see in - # #402 and #439. I don't think it hurts to silently ignore the error, so that's - # what we do - pass diff --git a/core/pe/matchblock.py b/core/pe/matchblock.py index 447d8ae7..bc203175 100644 --- a/core/pe/matchblock.py +++ b/core/pe/matchblock.py @@ -16,6 +16,7 @@ from hscommon.jobprogress import job from core.engine import Match from core.pe.block import avgdiff, DifferentBlockCountError, NoBlocksError +from core.pe.cache_sqlite import SqliteCache # OPTIMIZATION NOTES: # The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another @@ -50,14 +51,7 @@ except Exception: def get_cache(cache_path, readonly=False): - if cache_path.endswith("shelve"): - from core.pe.cache_shelve import ShelveCache - - return ShelveCache(cache_path, readonly=readonly) - else: - from core.pe.cache_sqlite import SqliteCache - - return SqliteCache(cache_path, readonly=readonly) + return SqliteCache(cache_path, readonly=readonly) def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): diff --git a/core/tests/cache_test.py b/core/tests/cache_test.py index 7b11c02c..b47cb998 100644 --- a/core/tests/cache_test.py +++ b/core/tests/cache_test.py @@ -12,7 +12,6 @@ from hscommon.testutil import eq_ try: from core.pe.cache import colors_to_string, string_to_colors from core.pe.cache_sqlite import SqliteCache - from core.pe.cache_shelve import ShelveCache except ImportError: skip("Can't import the cache module, probably hasn't been compiled.") @@ -133,11 +132,6 @@ class TestCaseSqliteCache(BaseTestCaseCache): eq_(c["foo"], [(1, 2, 3)]) -class TestCaseShelveCache(BaseTestCaseCache): - def get_cache(self, dbname=None): - return ShelveCache(dbname) - - class TestCaseCacheSQLEscape: def get_cache(self): return SqliteCache() diff --git a/qt/app.py b/qt/app.py index d0533743..5e6271c0 100644 --- a/qt/app.py +++ b/qt/app.py @@ -192,7 +192,6 @@ class DupeGuru(QObject): scanned_tags.add("year") self.model.options["scanned_tags"] = scanned_tags self.model.options["match_scaled"] = self.prefs.match_scaled - self.model.options["picture_cache_type"] = self.prefs.picture_cache_type self.model.options["include_exists_check"] = self.prefs.include_exists_check self.model.options["rehash_ignore_mtime"] = self.prefs.rehash_ignore_mtime diff --git a/qt/pe/preferences_dialog.py b/qt/pe/preferences_dialog.py index 2ded026d..375cc779 100644 --- a/qt/pe/preferences_dialog.py +++ b/qt/pe/preferences_dialog.py @@ -4,11 +4,9 @@ # which should be included with this package. The terms are also available at # http://www.gnu.org/licenses/gpl-3.0.html -from PyQt5.QtWidgets import QFormLayout -from PyQt5.QtCore import Qt + from hscommon.trans import trget from hscommon.plat import ISLINUX -from qt.radio_box import RadioBox from core.scanner import ScanType from core.app import AppMode @@ -35,11 +33,6 @@ class PreferencesDialog(PreferencesDialogBase): ) self.widgetsVLayout.addWidget(self.ignoreHardlinkMatches) - self.cacheTypeRadio = RadioBox(self, items=["Sqlite", "Shelve"], spread=False) - cache_form = QFormLayout() - cache_form.setLabelAlignment(Qt.AlignLeft) - cache_form.addRow(tr("Picture cache mode:"), self.cacheTypeRadio) - self.widgetsVLayout.addLayout(cache_form) self._setupBottomPart() def _setupDisplayPage(self): @@ -64,7 +57,6 @@ show scrollbars to span the view around" def _load(self, prefs, setchecked, section): setchecked(self.matchScaledBox, prefs.match_scaled) - self.cacheTypeRadio.selected_index = 1 if prefs.picture_cache_type == "shelve" else 0 # Update UI state based on selected scan type scan_type = prefs.get_scan_type(AppMode.PICTURE) @@ -75,6 +67,5 @@ show scrollbars to span the view around" def _save(self, prefs, ischecked): prefs.match_scaled = ischecked(self.matchScaledBox) - prefs.picture_cache_type = "shelve" if self.cacheTypeRadio.selected_index == 1 else "sqlite" prefs.details_dialog_override_theme_icons = ischecked(self.details_dialog_override_theme_icons) prefs.details_dialog_viewers_show_scrollbars = ischecked(self.details_dialog_viewers_show_scrollbars) diff --git a/qt/preferences.py b/qt/preferences.py index 0c4cb651..17ae3bf9 100644 --- a/qt/preferences.py +++ b/qt/preferences.py @@ -225,7 +225,6 @@ class Preferences(PreferencesBase): self.scan_tag_genre = get("ScanTagGenre", self.scan_tag_genre) self.scan_tag_year = get("ScanTagYear", self.scan_tag_year) self.match_scaled = get("MatchScaled", self.match_scaled) - self.picture_cache_type = get("PictureCacheType", self.picture_cache_type) def reset(self): self.filter_hardness = 95 @@ -278,7 +277,6 @@ class Preferences(PreferencesBase): self.scan_tag_genre = False self.scan_tag_year = False self.match_scaled = False - self.picture_cache_type = "sqlite" def _save_values(self, settings): set_ = self.set_value @@ -332,7 +330,6 @@ class Preferences(PreferencesBase): set_("ScanTagGenre", self.scan_tag_genre) set_("ScanTagYear", self.scan_tag_year) set_("MatchScaled", self.match_scaled) - set_("PictureCacheType", self.picture_cache_type) # scan_type is special because we save it immediately when we set it. def get_scan_type(self, app_mode):