feat: Remove shelve picture cache

- Remove shelve picture cache as it has had a fair number of historical issues. Original issue for which it was added should be long resolved. Additionally this allows additional consolidation of the various cache code and potentially dbs in the future. - Remove all related preferences and related code for changing cache backend between sqlite and shelve.
2026-06-19 13:37:52 +00:00 · 2023-01-06 00:35:23 -06:00
parent 091cae0cc6
commit 83f5e80427
7 changed files with 4 additions and 174 deletions
--- a/core/pe/cache_shelve.py
+++ b/core/pe/cache_shelve.py
@@ -1,141 +0,0 @@
-# Copyright 2016 Virgil Dupras
-#
-# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
-# which should be included with this package. The terms are also available at
-# http://www.gnu.org/licenses/gpl-3.0.html
-
-import os
-import os.path as op
-import shelve
-import tempfile
-from collections import namedtuple
-
-from core.pe.cache import string_to_colors, colors_to_string
-
-
-def wrap_path(path):
-    return f"path:{path}"
-
-
-def unwrap_path(key):
-    return key[5:]
-
-
-def wrap_id(path):
-    return f"id:{path}"
-
-
-def unwrap_id(key):
-    return int(key[3:])
-
-
-CacheRow = namedtuple("CacheRow", "id path blocks mtime")
-
-
-class ShelveCache:
-    """A class to cache picture blocks in a shelve backend."""
-
-    def __init__(self, db=None, readonly=False):
-        self.istmp = db is None
-        if self.istmp:
-            self.dtmp = tempfile.mkdtemp()
-            self.ftmp = db = op.join(self.dtmp, "tmpdb")
-        flag = "r" if readonly else "c"
-        self.shelve = shelve.open(db, flag)
-        self.maxid = self._compute_maxid()
-
-    def __contains__(self, key):
-        return wrap_path(key) in self.shelve
-
-    def __delitem__(self, key):
-        row = self.shelve[wrap_path(key)]
-        del self.shelve[wrap_path(key)]
-        del self.shelve[wrap_id(row.id)]
-
-    def __getitem__(self, key):
-        if isinstance(key, int):
-            skey = self.shelve[wrap_id(key)]
-        else:
-            skey = wrap_path(key)
-        return string_to_colors(self.shelve[skey].blocks)
-
-    def __iter__(self):
-        return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
-
-    def __len__(self):
-        return sum(1 for k in self.shelve if k.startswith("path:"))
-
-    def __setitem__(self, path_str, blocks):
-        blocks = colors_to_string(blocks)
-        if op.exists(path_str):
-            mtime = int(os.stat(path_str).st_mtime)
-        else:
-            mtime = 0
-        if path_str in self:
-            rowid = self.shelve[wrap_path(path_str)].id
-        else:
-            rowid = self._get_new_id()
-        row = CacheRow(rowid, path_str, blocks, mtime)
-        self.shelve[wrap_path(path_str)] = row
-        self.shelve[wrap_id(rowid)] = wrap_path(path_str)
-
-    def _compute_maxid(self):
-        return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1)
-
-    def _get_new_id(self):
-        self.maxid += 1
-        return self.maxid
-
-    def clear(self):
-        self.shelve.clear()
-
-    def close(self):
-        if self.shelve is not None:
-            self.shelve.close()
-            if self.istmp:
-                os.remove(self.ftmp)
-                os.rmdir(self.dtmp)
-        self.shelve = None
-
-    def filter(self, func):
-        to_delete = [key for key in self if not func(key)]
-        for key in to_delete:
-            del self[key]
-
-    def get_id(self, path):
-        if path in self:
-            return self.shelve[wrap_path(path)].id
-        else:
-            raise ValueError(path)
-
-    def get_multiple(self, rowids):
-        for rowid in rowids:
-            try:
-                skey = self.shelve[wrap_id(rowid)]
-            except KeyError:
-                continue
-            yield (rowid, string_to_colors(self.shelve[skey].blocks))
-
-    def purge_outdated(self):
-        """Go through the cache and purge outdated records.
-
-        A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
-        the db.
-        """
-        todelete = []
-        for path in self:
-            row = self.shelve[wrap_path(path)]
-            if row.mtime and op.exists(path):
-                picture_mtime = os.stat(path).st_mtime
-                if int(picture_mtime) <= row.mtime:
-                    # not outdated
-                    continue
-            todelete.append(path)
-        for path in todelete:
-            try:
-                del self[path]
-            except KeyError:
-                # I have no idea why a KeyError sometimes happen, but it does, as we can see in
-                # #402 and #439. I don't think it hurts to silently ignore the error, so that's
-                # what we do
-                pass
--- a/core/pe/matchblock.py
+++ b/core/pe/matchblock.py
@@ -16,6 +16,7 @@ from hscommon.jobprogress import job

 from core.engine import Match
 from core.pe.block import avgdiff, DifferentBlockCountError, NoBlocksError
+from core.pe.cache_sqlite import SqliteCache

 # OPTIMIZATION NOTES:
 # The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
@@ -50,14 +51,7 @@ except Exception:


 def get_cache(cache_path, readonly=False):
-    if cache_path.endswith("shelve"):
-        from core.pe.cache_shelve import ShelveCache
-
-        return ShelveCache(cache_path, readonly=readonly)
-    else:
-        from core.pe.cache_sqlite import SqliteCache
-
-        return SqliteCache(cache_path, readonly=readonly)
+    return SqliteCache(cache_path, readonly=readonly)


 def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):